1 /**
2 * MojoShader; generate shader programs from bytecode of compiled
3 * Direct3D shaders.
4 *
5 * Please see the file LICENSE.txt in the source's root directory.
6 *
7 * This file written by Ryan C. Gordon.
8 */
9
10 // !!! FIXME: this file really needs to be split up.
11 // !!! FIXME: I keep changing coding styles for symbols and typedefs.
12
13 // !!! FIXME: rules from MSDN about temp registers we probably don't check.
14 // - There are limited temporaries: vs_1_1 has 12 (ps_1_1 has _2_!).
15 // - SM2 apparently was variable, between 12 and 32. Shader Model 3 has 32.
16 // - A maximum of three temp registers can be used in a single instruction.
17
18 #define __MOJOSHADER_INTERNAL__ 1
19 #include "mojoshader_internal.h"
20
21 typedef struct ConstantsList
22 {
23 MOJOSHADER_constant constant;
24 struct ConstantsList *next;
25 } ConstantsList;
26
27 typedef struct VariableList
28 {
29 MOJOSHADER_uniformType type;
30 int index;
31 int count;
32 ConstantsList *constant;
33 int used;
34 int emit_position; // used in some profiles.
35 struct VariableList *next;
36 } VariableList;
37
38 typedef struct RegisterList
39 {
40 RegisterType regtype;
41 int regnum;
42 MOJOSHADER_usage usage;
43 unsigned int index;
44 int writemask;
45 int misc;
46 int written;
47 const VariableList *array;
48 struct RegisterList *next;
49 } RegisterList;
50
51 typedef struct
52 {
53 const uint32 *token; // this is the unmolested token in the stream.
54 int regnum;
55 int swizzle; // xyzw (all four, not split out).
56 int swizzle_x;
57 int swizzle_y;
58 int swizzle_z;
59 int swizzle_w;
60 SourceMod src_mod;
61 RegisterType regtype;
62 int relative;
63 RegisterType relative_regtype;
64 int relative_regnum;
65 int relative_component;
66 const VariableList *relative_array;
67 } SourceArgInfo;
68
69 struct Profile; // predeclare.
70
71 typedef struct CtabData
72 {
73 int have_ctab;
74 int symbol_count;
75 MOJOSHADER_symbol *symbols;
76 } CtabData;
77
78 // Context...this is state that changes as we parse through a shader...
79 typedef struct Context
80 {
81 int isfail;
82 int out_of_memory;
83 MOJOSHADER_malloc malloc;
84 MOJOSHADER_free free;
85 void *malloc_data;
86 int current_position;
87 const uint32 *orig_tokens;
88 const uint32 *tokens;
89 uint32 tokencount;
90 const MOJOSHADER_swizzle *swizzles;
91 unsigned int swizzles_count;
92 const MOJOSHADER_samplerMap *samplermap;
93 unsigned int samplermap_count;
94 Buffer *output;
95 Buffer *preflight;
96 Buffer *globals;
97 Buffer *helpers;
98 Buffer *subroutines;
99 Buffer *mainline_intro;
100 Buffer *mainline;
101 Buffer *ignore;
102 Buffer *output_stack[2];
103 int indent_stack[2];
104 int output_stack_len;
105 int indent;
106 const char *shader_type_str;
107 const char *endline;
108 int endline_len;
109 int profileid;
110 const struct Profile *profile;
111 MOJOSHADER_shaderType shader_type;
112 uint8 major_ver;
113 uint8 minor_ver;
114 DestArgInfo dest_arg;
115 SourceArgInfo source_args[5];
116 SourceArgInfo predicate_arg; // for predicated instructions.
117 uint32 dwords[4];
118 uint32 version_token;
119 int instruction_count;
120 uint32 instruction_controls;
121 uint32 previous_opcode;
122 int coissue;
123 int loops;
124 int reps;
125 int max_reps;
126 int cmps;
127 int scratch_registers;
128 int max_scratch_registers;
129 int branch_labels_stack_index;
130 int branch_labels_stack[32];
131 int assigned_branch_labels;
132 int assigned_vertex_attributes;
133 int last_address_reg_component;
134 RegisterList used_registers;
135 RegisterList defined_registers;
136 ErrorList *errors;
137 int constant_count;
138 ConstantsList *constants;
139 int uniform_count;
140 int uniform_float4_count;
141 int uniform_int4_count;
142 int uniform_bool_count;
143 RegisterList uniforms;
144 int attribute_count;
145 RegisterList attributes;
146 int sampler_count;
147 RegisterList samplers;
148 VariableList *variables; // variables to register mapping.
149 int centroid_allowed;
150 CtabData ctab;
151 int have_relative_input_registers;
152 int have_multi_color_outputs;
153 int determined_constants_arrays;
154 int predicated;
155 int uses_pointsize;
156 int uses_fog;
157 int glsl_generated_lit_helper;
158 int glsl_generated_texldd_setup;
159 int glsl_generated_texm3x3spec_helper;
160 int arb1_wrote_position;
161 int have_preshader;
162 int ignores_ctab;
163 int reset_texmpad;
164 int texm3x2pad_dst0;
165 int texm3x2pad_src0;
166 int texm3x3pad_dst0;
167 int texm3x3pad_src0;
168 int texm3x3pad_dst1;
169 int texm3x3pad_src1;
170 MOJOSHADER_preshader *preshader;
171
172 #if SUPPORT_PROFILE_ARB1_NV
173 int profile_supports_nv2;
174 int profile_supports_nv3;
175 int profile_supports_nv4;
176 #endif
177 #if SUPPORT_PROFILE_GLSL120
178 int profile_supports_glsl120;
179 #endif
180 } Context;
181
182
183 // Use these macros so we can remove all bits of these profiles from the build.
184 #if SUPPORT_PROFILE_ARB1_NV
185 #define support_nv2(ctx) ((ctx)->profile_supports_nv2)
186 #define support_nv3(ctx) ((ctx)->profile_supports_nv3)
187 #define support_nv4(ctx) ((ctx)->profile_supports_nv4)
188 #else
189 #define support_nv2(ctx) (0)
190 #define support_nv3(ctx) (0)
191 #define support_nv4(ctx) (0)
192 #endif
193
194 #if SUPPORT_PROFILE_GLSL120
195 #define support_glsl120(ctx) ((ctx)->profile_supports_glsl120)
196 #else
197 #define support_glsl120(ctx) (0)
198 #endif
199
200
201 // Profile entry points...
202
203 // one emit function for each opcode in each profile.
204 typedef void (*emit_function)(Context *ctx);
205
206 // one emit function for starting output in each profile.
207 typedef void (*emit_start)(Context *ctx, const char *profilestr);
208
209 // one emit function for ending output in each profile.
210 typedef void (*emit_end)(Context *ctx);
211
212 // one emit function for phase opcode output in each profile.
213 typedef void (*emit_phase)(Context *ctx);
214
215 // one emit function for finalizing output in each profile.
216 typedef void (*emit_finalize)(Context *ctx);
217
218 // one emit function for global definitions in each profile.
219 typedef void (*emit_global)(Context *ctx, RegisterType regtype, int regnum);
220
221 // one emit function for relative uniform arrays in each profile.
222 typedef void (*emit_array)(Context *ctx, VariableList *var);
223
224 // one emit function for relative constants arrays in each profile.
225 typedef void (*emit_const_array)(Context *ctx,
226 const struct ConstantsList *constslist,
227 int base, int size);
228
229 // one emit function for uniforms in each profile.
230 typedef void (*emit_uniform)(Context *ctx, RegisterType regtype, int regnum,
231 const VariableList *var);
232
233 // one emit function for samplers in each profile.
234 typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype,
235 int texbem);
236
237 // one emit function for attributes in each profile.
238 typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum,
239 MOJOSHADER_usage usage, int index, int wmask,
240 int flags);
241
242 // one args function for each possible sequence of opcode arguments.
243 typedef int (*args_function)(Context *ctx);
244
245 // one state function for each opcode where we have state machine updates.
246 typedef void (*state_function)(Context *ctx);
247
248 // one function for varnames in each profile.
249 typedef const char *(*varname_function)(Context *c, RegisterType t, int num);
250
251 // one function for const var array in each profile.
252 typedef const char *(*const_array_varname_function)(Context *c, int base, int size);
253
254 typedef struct Profile
255 {
256 const char *name;
257 emit_start start_emitter;
258 emit_end end_emitter;
259 emit_phase phase_emitter;
260 emit_global global_emitter;
261 emit_array array_emitter;
262 emit_const_array const_array_emitter;
263 emit_uniform uniform_emitter;
264 emit_sampler sampler_emitter;
265 emit_attribute attribute_emitter;
266 emit_finalize finalize_emitter;
267 varname_function get_varname;
268 const_array_varname_function get_const_array_varname;
269 } Profile;
270
271
272 // Convenience functions for allocators...
273 #if !MOJOSHADER_FORCE_ALLOCATOR
MOJOSHADER_internal_malloc(int bytes,void * d)274 void *MOJOSHADER_internal_malloc(int bytes, void *d) { return malloc(bytes); }
MOJOSHADER_internal_free(void * ptr,void * d)275 void MOJOSHADER_internal_free(void *ptr, void *d) { free(ptr); }
276 #endif
277
278 MOJOSHADER_error MOJOSHADER_out_of_mem_error = {
279 "Out of memory", NULL, MOJOSHADER_POSITION_NONE
280 };
281
282 MOJOSHADER_parseData MOJOSHADER_out_of_mem_data = {
283 1, &MOJOSHADER_out_of_mem_error, 0, 0, 0, 0,
284 MOJOSHADER_TYPE_UNKNOWN, 0, 0, 0, 0
285 };
286
287
288 // !!! FIXME: cut and paste between every damned source file follows...
289 // !!! FIXME: We need to make some sort of ContextBase that applies to all
290 // !!! FIXME: files and move this stuff to mojoshader_common.c ...
291
out_of_memory(Context * ctx)292 static inline void out_of_memory(Context *ctx)
293 {
294 ctx->isfail = ctx->out_of_memory = 1;
295 } // out_of_memory
296
Malloc(Context * ctx,const size_t len)297 static inline void *Malloc(Context *ctx, const size_t len)
298 {
299 void *retval = ctx->malloc((int) len, ctx->malloc_data);
300 if (retval == NULL)
301 out_of_memory(ctx);
302 return retval;
303 } // Malloc
304
StrDup(Context * ctx,const char * str)305 static inline char *StrDup(Context *ctx, const char *str)
306 {
307 char *retval = (char *) Malloc(ctx, strlen(str) + 1);
308 if (retval != NULL)
309 strcpy(retval, str);
310 return retval;
311 } // StrDup
312
Free(Context * ctx,void * ptr)313 static inline void Free(Context *ctx, void *ptr)
314 {
315 ctx->free(ptr, ctx->malloc_data);
316 } // Free
317
MallocBridge(int bytes,void * data)318 static void *MallocBridge(int bytes, void *data)
319 {
320 return Malloc((Context *) data, (size_t) bytes);
321 } // MallocBridge
322
FreeBridge(void * ptr,void * data)323 static void FreeBridge(void *ptr, void *data)
324 {
325 Free((Context *) data, ptr);
326 } // FreeBridge
327
328
329 // jump between output sections in the context...
330
set_output(Context * ctx,Buffer ** section)331 static int set_output(Context *ctx, Buffer **section)
332 {
333 // only create output sections on first use.
334 if (*section == NULL)
335 {
336 *section = buffer_create(256, MallocBridge, FreeBridge, ctx);
337 if (*section == NULL)
338 return 0;
339 } // if
340
341 ctx->output = *section;
342 return 1;
343 } // set_output
344
push_output(Context * ctx,Buffer ** section)345 static void push_output(Context *ctx, Buffer **section)
346 {
347 assert(ctx->output_stack_len < (int) (STATICARRAYLEN(ctx->output_stack)));
348 ctx->output_stack[ctx->output_stack_len] = ctx->output;
349 ctx->indent_stack[ctx->output_stack_len] = ctx->indent;
350 ctx->output_stack_len++;
351 if (!set_output(ctx, section))
352 return;
353 ctx->indent = 0;
354 } // push_output
355
pop_output(Context * ctx)356 static inline void pop_output(Context *ctx)
357 {
358 assert(ctx->output_stack_len > 0);
359 ctx->output_stack_len--;
360 ctx->output = ctx->output_stack[ctx->output_stack_len];
361 ctx->indent = ctx->indent_stack[ctx->output_stack_len];
362 } // pop_output
363
364
365
366 // Shader model version magic...
367
ver_ui32(const uint8 major,const uint8 minor)368 static inline uint32 ver_ui32(const uint8 major, const uint8 minor)
369 {
370 return ( (((uint32) major) << 16) | (((minor) == 0xFF) ? 1 : (minor)) );
371 } // version_ui32
372
shader_version_supported(const uint8 maj,const uint8 min)373 static inline int shader_version_supported(const uint8 maj, const uint8 min)
374 {
375 return (ver_ui32(maj,min) <= ver_ui32(MAX_SHADER_MAJOR, MAX_SHADER_MINOR));
376 } // shader_version_supported
377
shader_version_atleast(const Context * ctx,const uint8 maj,const uint8 min)378 static inline int shader_version_atleast(const Context *ctx, const uint8 maj,
379 const uint8 min)
380 {
381 return (ver_ui32(ctx->major_ver, ctx->minor_ver) >= ver_ui32(maj, min));
382 } // shader_version_atleast
383
shader_version_exactly(const Context * ctx,const uint8 maj,const uint8 min)384 static inline int shader_version_exactly(const Context *ctx, const uint8 maj,
385 const uint8 min)
386 {
387 return ((ctx->major_ver == maj) && (ctx->minor_ver == min));
388 } // shader_version_exactly
389
shader_is_pixel(const Context * ctx)390 static inline int shader_is_pixel(const Context *ctx)
391 {
392 return (ctx->shader_type == MOJOSHADER_TYPE_PIXEL);
393 } // shader_is_pixel
394
shader_is_vertex(const Context * ctx)395 static inline int shader_is_vertex(const Context *ctx)
396 {
397 return (ctx->shader_type == MOJOSHADER_TYPE_VERTEX);
398 } // shader_is_vertex
399
400
isfail(const Context * ctx)401 static inline int isfail(const Context *ctx)
402 {
403 return ctx->isfail;
404 } // isfail
405
406
407 static void failf(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
failf(Context * ctx,const char * fmt,...)408 static void failf(Context *ctx, const char *fmt, ...)
409 {
410 ctx->isfail = 1;
411 if (ctx->out_of_memory)
412 return;
413
414 // no filename at this level (we pass a NULL to errorlist_add_va()...)
415 va_list ap;
416 va_start(ap, fmt);
417 errorlist_add_va(ctx->errors, NULL, ctx->current_position, fmt, ap);
418 va_end(ap);
419 } // failf
420
421
fail(Context * ctx,const char * reason)422 static inline void fail(Context *ctx, const char *reason)
423 {
424 failf(ctx, "%s", reason);
425 } // fail
426
427
428 static void output_line(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
output_line(Context * ctx,const char * fmt,...)429 static void output_line(Context *ctx, const char *fmt, ...)
430 {
431 assert(ctx->output != NULL);
432 if (isfail(ctx))
433 return; // we failed previously, don't go on...
434
435 const int indent = ctx->indent;
436 if (indent > 0)
437 {
438 char *indentbuf = (char *) alloca(indent);
439 memset(indentbuf, '\t', indent);
440 buffer_append(ctx->output, indentbuf, indent);
441 } // if
442
443 va_list ap;
444 va_start(ap, fmt);
445 buffer_append_va(ctx->output, fmt, ap);
446 va_end(ap);
447
448 buffer_append(ctx->output, ctx->endline, ctx->endline_len);
449 } // output_line
450
451
output_blank_line(Context * ctx)452 static inline void output_blank_line(Context *ctx)
453 {
454 assert(ctx->output != NULL);
455 if (!isfail(ctx))
456 buffer_append(ctx->output, ctx->endline, ctx->endline_len);
457 } // output_blank_line
458
459
460 // !!! FIXME: this is sort of nasty.
floatstr(Context * ctx,char * buf,size_t bufsize,float f,int leavedecimal)461 static void floatstr(Context *ctx, char *buf, size_t bufsize, float f,
462 int leavedecimal)
463 {
464 const size_t len = snprintf(buf, bufsize, "%f", f);
465 if ((len+2) >= bufsize)
466 fail(ctx, "BUG: internal buffer is too small");
467 else
468 {
469 char *end = buf + len;
470 char *ptr = strchr(buf, '.');
471 if (ptr == NULL)
472 {
473 if (leavedecimal)
474 strcat(buf, ".0");
475 return; // done.
476 } // if
477
478 while (--end != ptr)
479 {
480 if (*end != '0')
481 {
482 end++;
483 break;
484 } // if
485 } // while
486 if ((leavedecimal) && (end == ptr))
487 end += 2;
488 *end = '\0'; // chop extra '0' or all decimal places off.
489 } // else
490 } // floatstr
491
cvtMojoToD3DSamplerType(const MOJOSHADER_samplerType type)492 static inline TextureType cvtMojoToD3DSamplerType(const MOJOSHADER_samplerType type)
493 {
494 return (TextureType) (((int) type) + 2);
495 } // cvtMojoToD3DSamplerType
496
cvtD3DToMojoSamplerType(const TextureType type)497 static inline MOJOSHADER_samplerType cvtD3DToMojoSamplerType(const TextureType type)
498 {
499 return (MOJOSHADER_samplerType) (((int) type) - 2);
500 } // cvtD3DToMojoSamplerType
501
502
503 // Deal with register lists... !!! FIXME: I sort of hate this.
504
free_reglist(MOJOSHADER_free f,void * d,RegisterList * item)505 static void free_reglist(MOJOSHADER_free f, void *d, RegisterList *item)
506 {
507 while (item != NULL)
508 {
509 RegisterList *next = item->next;
510 f(item, d);
511 item = next;
512 } // while
513 } // free_reglist
514
reg_to_ui32(const RegisterType regtype,const int regnum)515 static inline uint32 reg_to_ui32(const RegisterType regtype, const int regnum)
516 {
517 return ( ((uint32) regtype) | (((uint32) regnum) << 16) );
518 } // reg_to_uint32
519
520 // !!! FIXME: ditch this for a hash table.
reglist_insert(Context * ctx,RegisterList * prev,const RegisterType regtype,const int regnum)521 static RegisterList *reglist_insert(Context *ctx, RegisterList *prev,
522 const RegisterType regtype,
523 const int regnum)
524 {
525 const uint32 newval = reg_to_ui32(regtype, regnum);
526 RegisterList *item = prev->next;
527 while (item != NULL)
528 {
529 const uint32 val = reg_to_ui32(item->regtype, item->regnum);
530 if (newval == val)
531 return item; // already set, so we're done.
532 else if (newval < val) // insert it here.
533 break;
534 else // if (newval > val)
535 {
536 // keep going, we're not to the insertion point yet.
537 prev = item;
538 item = item->next;
539 } // else
540 } // while
541
542 // we need to insert an entry after (prev).
543 item = (RegisterList *) Malloc(ctx, sizeof (RegisterList));
544 if (item != NULL)
545 {
546 item->regtype = regtype;
547 item->regnum = regnum;
548 item->usage = MOJOSHADER_USAGE_UNKNOWN;
549 item->index = 0;
550 item->writemask = 0;
551 item->misc = 0;
552 item->array = NULL;
553 item->next = prev->next;
554 prev->next = item;
555 } // if
556
557 return item;
558 } // reglist_insert
559
reglist_find(const RegisterList * prev,const RegisterType rtype,const int regnum)560 static RegisterList *reglist_find(const RegisterList *prev,
561 const RegisterType rtype, const int regnum)
562 {
563 const uint32 newval = reg_to_ui32(rtype, regnum);
564 RegisterList *item = prev->next;
565 while (item != NULL)
566 {
567 const uint32 val = reg_to_ui32(item->regtype, item->regnum);
568 if (newval == val)
569 return item; // here it is.
570 else if (newval < val) // should have been here if it existed.
571 return NULL;
572 else // if (newval > val)
573 item = item->next;
574 } // while
575
576 return NULL; // wasn't in the list.
577 } // reglist_find
578
reglist_exists(RegisterList * prev,const RegisterType regtype,const int regnum)579 static inline const RegisterList *reglist_exists(RegisterList *prev,
580 const RegisterType regtype,
581 const int regnum)
582 {
583 return (reglist_find(prev, regtype, regnum));
584 } // reglist_exists
585
register_was_written(Context * ctx,const RegisterType rtype,const int regnum)586 static inline int register_was_written(Context *ctx, const RegisterType rtype,
587 const int regnum)
588 {
589 RegisterList *reg = reglist_find(&ctx->used_registers, rtype, regnum);
590 return (reg && reg->written);
591 } // register_was_written
592
set_used_register(Context * ctx,const RegisterType regtype,const int regnum,const int written)593 static inline RegisterList *set_used_register(Context *ctx,
594 const RegisterType regtype,
595 const int regnum,
596 const int written)
597 {
598 RegisterList *reg = NULL;
599 if ((regtype == REG_TYPE_COLOROUT) && (regnum > 0))
600 ctx->have_multi_color_outputs = 1;
601
602 reg = reglist_insert(ctx, &ctx->used_registers, regtype, regnum);
603 if (reg && written)
604 reg->written = 1;
605 return reg;
606 } // set_used_register
607
get_used_register(Context * ctx,const RegisterType regtype,const int regnum)608 static inline int get_used_register(Context *ctx, const RegisterType regtype,
609 const int regnum)
610 {
611 return (reglist_exists(&ctx->used_registers, regtype, regnum) != NULL);
612 } // get_used_register
613
set_defined_register(Context * ctx,const RegisterType rtype,const int regnum)614 static inline void set_defined_register(Context *ctx, const RegisterType rtype,
615 const int regnum)
616 {
617 reglist_insert(ctx, &ctx->defined_registers, rtype, regnum);
618 } // set_defined_register
619
get_defined_register(Context * ctx,const RegisterType rtype,const int regnum)620 static inline int get_defined_register(Context *ctx, const RegisterType rtype,
621 const int regnum)
622 {
623 return (reglist_exists(&ctx->defined_registers, rtype, regnum) != NULL);
624 } // get_defined_register
625
add_attribute_register(Context * ctx,const RegisterType rtype,const int regnum,const MOJOSHADER_usage usage,const int index,const int writemask,int flags)626 static void add_attribute_register(Context *ctx, const RegisterType rtype,
627 const int regnum, const MOJOSHADER_usage usage,
628 const int index, const int writemask, int flags)
629 {
630 RegisterList *item = reglist_insert(ctx, &ctx->attributes, rtype, regnum);
631 item->usage = usage;
632 item->index = index;
633 item->writemask = writemask;
634 item->misc = flags;
635
636 if ((rtype == REG_TYPE_OUTPUT) && (usage == MOJOSHADER_USAGE_POINTSIZE))
637 ctx->uses_pointsize = 1; // note that we have to check this later.
638 else if ((rtype == REG_TYPE_OUTPUT) && (usage == MOJOSHADER_USAGE_FOG))
639 ctx->uses_fog = 1; // note that we have to check this later.
640 } // add_attribute_register
641
add_sampler(Context * ctx,const int regnum,TextureType ttype,const int texbem)642 static inline void add_sampler(Context *ctx, const int regnum,
643 TextureType ttype, const int texbem)
644 {
645 const RegisterType rtype = REG_TYPE_SAMPLER;
646
647 // !!! FIXME: make sure it doesn't exist?
648 // !!! FIXME: (ps_1_1 assume we can add it multiple times...)
649 RegisterList *item = reglist_insert(ctx, &ctx->samplers, rtype, regnum);
650
651 if (ctx->samplermap != NULL)
652 {
653 unsigned int i;
654 for (i = 0; i < ctx->samplermap_count; i++)
655 {
656 if (ctx->samplermap[i].index == regnum)
657 {
658 ttype = cvtMojoToD3DSamplerType(ctx->samplermap[i].type);
659 break;
660 } // if
661 } // for
662 } // if
663
664 item->index = (int) ttype;
665 item->misc |= texbem;
666 } // add_sampler
667
668
writemask_xyzw(const int writemask)669 static inline int writemask_xyzw(const int writemask)
670 {
671 return (writemask == 0xF); // 0xF == 1111. No explicit mask (full!).
672 } // writemask_xyzw
673
674
writemask_xyz(const int writemask)675 static inline int writemask_xyz(const int writemask)
676 {
677 return (writemask == 0x7); // 0x7 == 0111. (that is: xyz)
678 } // writemask_xyz
679
680
writemask_xy(const int writemask)681 static inline int writemask_xy(const int writemask)
682 {
683 return (writemask == 0x3); // 0x3 == 0011. (that is: xy)
684 } // writemask_xy
685
686
writemask_x(const int writemask)687 static inline int writemask_x(const int writemask)
688 {
689 return (writemask == 0x1); // 0x1 == 0001. (that is: x)
690 } // writemask_x
691
692
writemask_y(const int writemask)693 static inline int writemask_y(const int writemask)
694 {
695 return (writemask == 0x2); // 0x1 == 0010. (that is: y)
696 } // writemask_y
697
698
replicate_swizzle(const int swizzle)699 static inline int replicate_swizzle(const int swizzle)
700 {
701 return ( (((swizzle >> 0) & 0x3) == ((swizzle >> 2) & 0x3)) &&
702 (((swizzle >> 2) & 0x3) == ((swizzle >> 4) & 0x3)) &&
703 (((swizzle >> 4) & 0x3) == ((swizzle >> 6) & 0x3)) );
704 } // replicate_swizzle
705
706
no_swizzle(const int swizzle)707 static inline int no_swizzle(const int swizzle)
708 {
709 return (swizzle == 0xE4); // 0xE4 == 11100100 ... 0 1 2 3. No swizzle.
710 } // no_swizzle
711
712
vecsize_from_writemask(const int m)713 static inline int vecsize_from_writemask(const int m)
714 {
715 return (m & 1) + ((m >> 1) & 1) + ((m >> 2) & 1) + ((m >> 3) & 1);
716 } // vecsize_from_writemask
717
718
set_dstarg_writemask(DestArgInfo * dst,const int mask)719 static inline void set_dstarg_writemask(DestArgInfo *dst, const int mask)
720 {
721 dst->writemask = mask;
722 dst->writemask0 = ((mask >> 0) & 1);
723 dst->writemask1 = ((mask >> 1) & 1);
724 dst->writemask2 = ((mask >> 2) & 1);
725 dst->writemask3 = ((mask >> 3) & 1);
726 } // set_dstarg_writemask
727
728
allocate_scratch_register(Context * ctx)729 static int allocate_scratch_register(Context *ctx)
730 {
731 const int retval = ctx->scratch_registers++;
732 if (retval >= ctx->max_scratch_registers)
733 ctx->max_scratch_registers = retval + 1;
734 return retval;
735 } // allocate_scratch_register
736
allocate_branch_label(Context * ctx)737 static int allocate_branch_label(Context *ctx)
738 {
739 return ctx->assigned_branch_labels++;
740 } // allocate_branch_label
741
adjust_token_position(Context * ctx,const int incr)742 static inline void adjust_token_position(Context *ctx, const int incr)
743 {
744 ctx->tokens += incr;
745 ctx->tokencount -= incr;
746 ctx->current_position += incr * sizeof (uint32);
747 } // adjust_token_position
748
749
750 // D3D stuff that's used in more than just the d3d profile...
751
isscalar(Context * ctx,const MOJOSHADER_shaderType shader_type,const RegisterType rtype,const int rnum)752 static int isscalar(Context *ctx, const MOJOSHADER_shaderType shader_type,
753 const RegisterType rtype, const int rnum)
754 {
755 const int uses_psize = ctx->uses_pointsize;
756 const int uses_fog = ctx->uses_fog;
757 if ( (rtype == REG_TYPE_OUTPUT) && ((uses_psize) || (uses_fog)) )
758 {
759 const RegisterList *reg = reglist_find(&ctx->attributes, rtype, rnum);
760 if (reg != NULL)
761 {
762 const MOJOSHADER_usage usage = reg->usage;
763 return ( (uses_psize && (usage == MOJOSHADER_USAGE_POINTSIZE)) ||
764 (uses_fog && (usage == MOJOSHADER_USAGE_FOG)) );
765 } // if
766 } // if
767
768 return scalar_register(shader_type, rtype, rnum);
769 } // isscalar
770
771 static const char swizzle_channels[] = { 'x', 'y', 'z', 'w' };
772
773
774 static const char *usagestrs[] = {
775 "_position", "_blendweight", "_blendindices", "_normal", "_psize",
776 "_texcoord", "_tangent", "_binormal", "_tessfactor", "_positiont",
777 "_color", "_fog", "_depth", "_sample"
778 };
779
get_D3D_register_string(Context * ctx,RegisterType regtype,int regnum,char * regnum_str,size_t regnum_size)780 static const char *get_D3D_register_string(Context *ctx,
781 RegisterType regtype,
782 int regnum, char *regnum_str,
783 size_t regnum_size)
784 {
785 const char *retval = NULL;
786 int has_number = 1;
787
788 switch (regtype)
789 {
790 case REG_TYPE_TEMP:
791 retval = "r";
792 break;
793
794 case REG_TYPE_INPUT:
795 retval = "v";
796 break;
797
798 case REG_TYPE_CONST:
799 retval = "c";
800 break;
801
802 case REG_TYPE_ADDRESS: // (or REG_TYPE_TEXTURE, same value.)
803 retval = shader_is_vertex(ctx) ? "a" : "t";
804 break;
805
806 case REG_TYPE_RASTOUT:
807 switch ((RastOutType) regnum)
808 {
809 case RASTOUT_TYPE_POSITION: retval = "oPos"; break;
810 case RASTOUT_TYPE_FOG: retval = "oFog"; break;
811 case RASTOUT_TYPE_POINT_SIZE: retval = "oPts"; break;
812 } // switch
813 has_number = 0;
814 break;
815
816 case REG_TYPE_ATTROUT:
817 retval = "oD";
818 break;
819
820 case REG_TYPE_OUTPUT: // (or REG_TYPE_TEXCRDOUT, same value.)
821 if (shader_is_vertex(ctx) && shader_version_atleast(ctx, 3, 0))
822 retval = "o";
823 else
824 retval = "oT";
825 break;
826
827 case REG_TYPE_CONSTINT:
828 retval = "i";
829 break;
830
831 case REG_TYPE_COLOROUT:
832 retval = "oC";
833 break;
834
835 case REG_TYPE_DEPTHOUT:
836 retval = "oDepth";
837 has_number = 0;
838 break;
839
840 case REG_TYPE_SAMPLER:
841 retval = "s";
842 break;
843
844 case REG_TYPE_CONSTBOOL:
845 retval = "b";
846 break;
847
848 case REG_TYPE_LOOP:
849 retval = "aL";
850 has_number = 0;
851 break;
852
853 case REG_TYPE_MISCTYPE:
854 switch ((const MiscTypeType) regnum)
855 {
856 case MISCTYPE_TYPE_POSITION: retval = "vPos"; break;
857 case MISCTYPE_TYPE_FACE: retval = "vFace"; break;
858 } // switch
859 has_number = 0;
860 break;
861
862 case REG_TYPE_LABEL:
863 retval = "l";
864 break;
865
866 case REG_TYPE_PREDICATE:
867 retval = "p";
868 break;
869
870 //case REG_TYPE_TEMPFLOAT16: // !!! FIXME: don't know this asm string
871 default:
872 fail(ctx, "unknown register type");
873 retval = "???";
874 has_number = 0;
875 break;
876 } // switch
877
878 if (has_number)
879 snprintf(regnum_str, regnum_size, "%u", (uint) regnum);
880 else
881 regnum_str[0] = '\0';
882
883 return retval;
884 } // get_D3D_register_string
885
886
887 // !!! FIXME: can we split the profile code out to separate source files?
888
889 #define AT_LEAST_ONE_PROFILE 0
890
891 #if !SUPPORT_PROFILE_D3D
892 #define PROFILE_EMITTER_D3D(op)
893 #else
894 #undef AT_LEAST_ONE_PROFILE
895 #define AT_LEAST_ONE_PROFILE 1
896 #define PROFILE_EMITTER_D3D(op) emit_D3D_##op,
897
make_D3D_srcarg_string_in_buf(Context * ctx,const SourceArgInfo * arg,char * buf,size_t buflen)898 static const char *make_D3D_srcarg_string_in_buf(Context *ctx,
899 const SourceArgInfo *arg,
900 char *buf, size_t buflen)
901 {
902 const char *premod_str = "";
903 const char *postmod_str = "";
904 switch (arg->src_mod)
905 {
906 case SRCMOD_NEGATE:
907 premod_str = "-";
908 break;
909
910 case SRCMOD_BIASNEGATE:
911 premod_str = "-";
912 // fall through.
913 case SRCMOD_BIAS:
914 postmod_str = "_bias";
915 break;
916
917 case SRCMOD_SIGNNEGATE:
918 premod_str = "-";
919 // fall through.
920 case SRCMOD_SIGN:
921 postmod_str = "_bx2";
922 break;
923
924 case SRCMOD_COMPLEMENT:
925 premod_str = "1-";
926 break;
927
928 case SRCMOD_X2NEGATE:
929 premod_str = "-";
930 // fall through.
931 case SRCMOD_X2:
932 postmod_str = "_x2";
933 break;
934
935 case SRCMOD_DZ:
936 postmod_str = "_dz";
937 break;
938
939 case SRCMOD_DW:
940 postmod_str = "_dw";
941 break;
942
943 case SRCMOD_ABSNEGATE:
944 premod_str = "-";
945 // fall through.
946 case SRCMOD_ABS:
947 postmod_str = "_abs";
948 break;
949
950 case SRCMOD_NOT:
951 premod_str = "!";
952 break;
953
954 case SRCMOD_NONE:
955 case SRCMOD_TOTAL:
956 break; // stop compiler whining.
957 } // switch
958
959
960 char regnum_str[16];
961 const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
962 arg->regnum, regnum_str,
963 sizeof (regnum_str));
964
965 if (regtype_str == NULL)
966 {
967 fail(ctx, "Unknown source register type.");
968 *buf = '\0';
969 return buf;
970 } // if
971
972 const char *rel_lbracket = "";
973 const char *rel_rbracket = "";
974 char rel_swizzle[4] = { '\0' };
975 char rel_regnum_str[16] = { '\0' };
976 const char *rel_regtype_str = "";
977 if (arg->relative)
978 {
979 rel_swizzle[0] = '.';
980 rel_swizzle[1] = swizzle_channels[arg->relative_component];
981 rel_swizzle[2] = '\0';
982 rel_lbracket = "[";
983 rel_rbracket = "]";
984 rel_regtype_str = get_D3D_register_string(ctx, arg->relative_regtype,
985 arg->relative_regnum,
986 rel_regnum_str,
987 sizeof (rel_regnum_str));
988
989 if (regtype_str == NULL)
990 {
991 fail(ctx, "Unknown relative source register type.");
992 *buf = '\0';
993 return buf;
994 } // if
995 } // if
996
997 char swizzle_str[6];
998 size_t i = 0;
999 const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
1000 if (!scalar && !no_swizzle(arg->swizzle))
1001 {
1002 swizzle_str[i++] = '.';
1003 swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
1004 swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
1005 swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
1006 swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
1007
1008 // .xyzz is the same as .xyz, .z is the same as .zzzz, etc.
1009 while (swizzle_str[i-1] == swizzle_str[i-2])
1010 i--;
1011 } // if
1012 swizzle_str[i] = '\0';
1013 assert(i < sizeof (swizzle_str));
1014
1015 // !!! FIXME: c12[a0.x] actually needs to be c[a0.x + 12]
1016 snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s",
1017 premod_str, regtype_str, regnum_str, postmod_str,
1018 rel_lbracket, rel_regtype_str, rel_regnum_str, rel_swizzle,
1019 rel_rbracket, swizzle_str);
1020 // !!! FIXME: make sure the scratch buffer was large enough.
1021 return buf;
1022 } // make_D3D_srcarg_string_in_buf
1023
1024
make_D3D_destarg_string(Context * ctx,char * buf,const size_t buflen)1025 static const char *make_D3D_destarg_string(Context *ctx, char *buf,
1026 const size_t buflen)
1027 {
1028 const DestArgInfo *arg = &ctx->dest_arg;
1029
1030 const char *result_shift_str = "";
1031 switch (arg->result_shift)
1032 {
1033 case 0x1: result_shift_str = "_x2"; break;
1034 case 0x2: result_shift_str = "_x4"; break;
1035 case 0x3: result_shift_str = "_x8"; break;
1036 case 0xD: result_shift_str = "_d8"; break;
1037 case 0xE: result_shift_str = "_d4"; break;
1038 case 0xF: result_shift_str = "_d2"; break;
1039 } // switch
1040
1041 const char *sat_str = (arg->result_mod & MOD_SATURATE) ? "_sat" : "";
1042 const char *pp_str = (arg->result_mod & MOD_PP) ? "_pp" : "";
1043 const char *cent_str = (arg->result_mod & MOD_CENTROID) ? "_centroid" : "";
1044
1045 char regnum_str[16];
1046 const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
1047 arg->regnum, regnum_str,
1048 sizeof (regnum_str));
1049 if (regtype_str == NULL)
1050 {
1051 fail(ctx, "Unknown destination register type.");
1052 *buf = '\0';
1053 return buf;
1054 } // if
1055
1056 char writemask_str[6];
1057 size_t i = 0;
1058 const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
1059 if (!scalar && !writemask_xyzw(arg->writemask))
1060 {
1061 writemask_str[i++] = '.';
1062 if (arg->writemask0) writemask_str[i++] = 'x';
1063 if (arg->writemask1) writemask_str[i++] = 'y';
1064 if (arg->writemask2) writemask_str[i++] = 'z';
1065 if (arg->writemask3) writemask_str[i++] = 'w';
1066 } // if
1067 writemask_str[i] = '\0';
1068 assert(i < sizeof (writemask_str));
1069
1070 const char *pred_left = "";
1071 const char *pred_right = "";
1072 char pred[32] = { '\0' };
1073 if (ctx->predicated)
1074 {
1075 pred_left = "(";
1076 pred_right = ") ";
1077 make_D3D_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
1078 pred, sizeof (pred));
1079 } // if
1080
1081 // may turn out something like "_x2_sat_pp_centroid (!p0.x) r0.xyzw" ...
1082 snprintf(buf, buflen, "%s%s%s%s %s%s%s%s%s%s",
1083 result_shift_str, sat_str, pp_str, cent_str,
1084 pred_left, pred, pred_right,
1085 regtype_str, regnum_str, writemask_str);
1086 // !!! FIXME: make sure the scratch buffer was large enough.
1087 return buf;
1088 } // make_D3D_destarg_string
1089
1090
make_D3D_srcarg_string(Context * ctx,const size_t idx,char * buf,size_t buflen)1091 static const char *make_D3D_srcarg_string(Context *ctx, const size_t idx,
1092 char *buf, size_t buflen)
1093 {
1094 if (idx >= STATICARRAYLEN(ctx->source_args))
1095 {
1096 fail(ctx, "Too many source args");
1097 *buf = '\0';
1098 return buf;
1099 } // if
1100
1101 const SourceArgInfo *arg = &ctx->source_args[idx];
1102 return make_D3D_srcarg_string_in_buf(ctx, arg, buf, buflen);
1103 } // make_D3D_srcarg_string
1104
get_D3D_varname_in_buf(Context * ctx,RegisterType rt,int regnum,char * buf,const size_t len)1105 static const char *get_D3D_varname_in_buf(Context *ctx, RegisterType rt,
1106 int regnum, char *buf,
1107 const size_t len)
1108 {
1109 char regnum_str[16];
1110 const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
1111 regnum_str, sizeof (regnum_str));
1112 snprintf(buf,len,"%s%s", regtype_str, regnum_str);
1113 return buf;
1114 } // get_D3D_varname_in_buf
1115
1116
get_D3D_varname(Context * ctx,RegisterType rt,int regnum)1117 static const char *get_D3D_varname(Context *ctx, RegisterType rt, int regnum)
1118 {
1119 char buf[64];
1120 get_D3D_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
1121 return StrDup(ctx, buf);
1122 } // get_D3D_varname
1123
1124
get_D3D_const_array_varname(Context * ctx,int base,int size)1125 static const char *get_D3D_const_array_varname(Context *ctx, int base, int size)
1126 {
1127 char buf[64];
1128 snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size);
1129 return StrDup(ctx, buf);
1130 } // get_D3D_const_array_varname
1131
1132
emit_D3D_start(Context * ctx,const char * profilestr)1133 static void emit_D3D_start(Context *ctx, const char *profilestr)
1134 {
1135 const uint major = (uint) ctx->major_ver;
1136 const uint minor = (uint) ctx->minor_ver;
1137 char minor_str[16];
1138
1139 ctx->ignores_ctab = 1;
1140
1141 if (minor == 0xFF)
1142 strcpy(minor_str, "sw");
1143 else if ((major > 1) && (minor == 1))
1144 strcpy(minor_str, "x"); // for >= SM2, apparently this is "x". Weird.
1145 else
1146 snprintf(minor_str, sizeof (minor_str), "%u", (uint) minor);
1147
1148 output_line(ctx, "%s_%u_%s", ctx->shader_type_str, major, minor_str);
1149 } // emit_D3D_start
1150
1151
emit_D3D_end(Context * ctx)1152 static void emit_D3D_end(Context *ctx)
1153 {
1154 output_line(ctx, "end");
1155 } // emit_D3D_end
1156
1157
emit_D3D_phase(Context * ctx)1158 static void emit_D3D_phase(Context *ctx)
1159 {
1160 output_line(ctx, "phase");
1161 } // emit_D3D_phase
1162
1163
emit_D3D_finalize(Context * ctx)1164 static void emit_D3D_finalize(Context *ctx)
1165 {
1166 // no-op.
1167 } // emit_D3D_finalize
1168
1169
emit_D3D_global(Context * ctx,RegisterType regtype,int regnum)1170 static void emit_D3D_global(Context *ctx, RegisterType regtype, int regnum)
1171 {
1172 // no-op.
1173 } // emit_D3D_global
1174
1175
emit_D3D_array(Context * ctx,VariableList * var)1176 static void emit_D3D_array(Context *ctx, VariableList *var)
1177 {
1178 // no-op.
1179 } // emit_D3D_array
1180
1181
emit_D3D_const_array(Context * ctx,const ConstantsList * clist,int base,int size)1182 static void emit_D3D_const_array(Context *ctx, const ConstantsList *clist,
1183 int base, int size)
1184 {
1185 // no-op.
1186 } // emit_D3D_const_array
1187
1188
emit_D3D_uniform(Context * ctx,RegisterType regtype,int regnum,const VariableList * var)1189 static void emit_D3D_uniform(Context *ctx, RegisterType regtype, int regnum,
1190 const VariableList *var)
1191 {
1192 // no-op.
1193 } // emit_D3D_uniform
1194
1195
emit_D3D_sampler(Context * ctx,int s,TextureType ttype,int tb)1196 static void emit_D3D_sampler(Context *ctx, int s, TextureType ttype, int tb)
1197 {
1198 // no-op.
1199 } // emit_D3D_sampler
1200
1201
emit_D3D_attribute(Context * ctx,RegisterType regtype,int regnum,MOJOSHADER_usage usage,int index,int wmask,int flags)1202 static void emit_D3D_attribute(Context *ctx, RegisterType regtype, int regnum,
1203 MOJOSHADER_usage usage, int index, int wmask,
1204 int flags)
1205 {
1206 // no-op.
1207 } // emit_D3D_attribute
1208
1209
emit_D3D_RESERVED(Context * ctx)1210 static void emit_D3D_RESERVED(Context *ctx)
1211 {
1212 // do nothing; fails in the state machine.
1213 } // emit_D3D_RESERVED
1214
1215
1216 // Generic D3D opcode emitters. A list of macros generate all the entry points
1217 // that call into these...
1218
lowercase(char * dst,const char * src)1219 static char *lowercase(char *dst, const char *src)
1220 {
1221 int i = 0;
1222 do
1223 {
1224 const char ch = src[i];
1225 dst[i] = (((ch >= 'A') && (ch <= 'Z')) ? (ch - ('A' - 'a')) : ch);
1226 } while (src[i++]);
1227 return dst;
1228 } // lowercase
1229
1230
emit_D3D_opcode_d(Context * ctx,const char * opcode)1231 static void emit_D3D_opcode_d(Context *ctx, const char *opcode)
1232 {
1233 char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
1234 opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1235 output_line(ctx, "%s%s%s", ctx->coissue ? "+" : "", opcode, dst);
1236 } // emit_D3D_opcode_d
1237
1238
emit_D3D_opcode_s(Context * ctx,const char * opcode)1239 static void emit_D3D_opcode_s(Context *ctx, const char *opcode)
1240 {
1241 char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1242 opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1243 output_line(ctx, "%s%s %s", ctx->coissue ? "+" : "", opcode, src0);
1244 } // emit_D3D_opcode_s
1245
1246
emit_D3D_opcode_ss(Context * ctx,const char * opcode)1247 static void emit_D3D_opcode_ss(Context *ctx, const char *opcode)
1248 {
1249 char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1250 char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
1251 opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1252 output_line(ctx, "%s%s %s, %s", ctx->coissue ? "+" : "", opcode, src0, src1);
1253 } // emit_D3D_opcode_ss
1254
1255
emit_D3D_opcode_ds(Context * ctx,const char * opcode)1256 static void emit_D3D_opcode_ds(Context *ctx, const char *opcode)
1257 {
1258 char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
1259 char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1260 opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1261 output_line(ctx, "%s%s%s, %s", ctx->coissue ? "+" : "", opcode, dst, src0);
1262 } // emit_D3D_opcode_ds
1263
1264
emit_D3D_opcode_dss(Context * ctx,const char * opcode)1265 static void emit_D3D_opcode_dss(Context *ctx, const char *opcode)
1266 {
1267 char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
1268 char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1269 char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
1270 opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1271 output_line(ctx, "%s%s%s, %s, %s", ctx->coissue ? "+" : "",
1272 opcode, dst, src0, src1);
1273 } // emit_D3D_opcode_dss
1274
1275
emit_D3D_opcode_dsss(Context * ctx,const char * opcode)1276 static void emit_D3D_opcode_dsss(Context *ctx, const char *opcode)
1277 {
1278 char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
1279 char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1280 char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
1281 char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2));
1282 opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1283 output_line(ctx, "%s%s%s, %s, %s, %s", ctx->coissue ? "+" : "",
1284 opcode, dst, src0, src1, src2);
1285 } // emit_D3D_opcode_dsss
1286
1287
emit_D3D_opcode_dssss(Context * ctx,const char * opcode)1288 static void emit_D3D_opcode_dssss(Context *ctx, const char *opcode)
1289 {
1290 char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
1291 char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1292 char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
1293 char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2));
1294 char src3[64]; make_D3D_srcarg_string(ctx, 3, src3, sizeof (src3));
1295 opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1296 output_line(ctx,"%s%s%s, %s, %s, %s, %s", ctx->coissue ? "+" : "",
1297 opcode, dst, src0, src1, src2, src3);
1298 } // emit_D3D_opcode_dssss
1299
1300
emit_D3D_opcode(Context * ctx,const char * opcode)1301 static void emit_D3D_opcode(Context *ctx, const char *opcode)
1302 {
1303 opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1304 output_line(ctx, "%s%s", ctx->coissue ? "+" : "", opcode);
1305 } // emit_D3D_opcode
1306
1307
1308 #define EMIT_D3D_OPCODE_FUNC(op) \
1309 static void emit_D3D_##op(Context *ctx) { \
1310 emit_D3D_opcode(ctx, #op); \
1311 }
1312 #define EMIT_D3D_OPCODE_D_FUNC(op) \
1313 static void emit_D3D_##op(Context *ctx) { \
1314 emit_D3D_opcode_d(ctx, #op); \
1315 }
1316 #define EMIT_D3D_OPCODE_S_FUNC(op) \
1317 static void emit_D3D_##op(Context *ctx) { \
1318 emit_D3D_opcode_s(ctx, #op); \
1319 }
1320 #define EMIT_D3D_OPCODE_SS_FUNC(op) \
1321 static void emit_D3D_##op(Context *ctx) { \
1322 emit_D3D_opcode_ss(ctx, #op); \
1323 }
1324 #define EMIT_D3D_OPCODE_DS_FUNC(op) \
1325 static void emit_D3D_##op(Context *ctx) { \
1326 emit_D3D_opcode_ds(ctx, #op); \
1327 }
1328 #define EMIT_D3D_OPCODE_DSS_FUNC(op) \
1329 static void emit_D3D_##op(Context *ctx) { \
1330 emit_D3D_opcode_dss(ctx, #op); \
1331 }
1332 #define EMIT_D3D_OPCODE_DSSS_FUNC(op) \
1333 static void emit_D3D_##op(Context *ctx) { \
1334 emit_D3D_opcode_dsss(ctx, #op); \
1335 }
1336 #define EMIT_D3D_OPCODE_DSSSS_FUNC(op) \
1337 static void emit_D3D_##op(Context *ctx) { \
1338 emit_D3D_opcode_dssss(ctx, #op); \
1339 }
1340
1341 EMIT_D3D_OPCODE_FUNC(NOP)
EMIT_D3D_OPCODE_DS_FUNC(MOV)1342 EMIT_D3D_OPCODE_DS_FUNC(MOV)
1343 EMIT_D3D_OPCODE_DSS_FUNC(ADD)
1344 EMIT_D3D_OPCODE_DSS_FUNC(SUB)
1345 EMIT_D3D_OPCODE_DSSS_FUNC(MAD)
1346 EMIT_D3D_OPCODE_DSS_FUNC(MUL)
1347 EMIT_D3D_OPCODE_DS_FUNC(RCP)
1348 EMIT_D3D_OPCODE_DS_FUNC(RSQ)
1349 EMIT_D3D_OPCODE_DSS_FUNC(DP3)
1350 EMIT_D3D_OPCODE_DSS_FUNC(DP4)
1351 EMIT_D3D_OPCODE_DSS_FUNC(MIN)
1352 EMIT_D3D_OPCODE_DSS_FUNC(MAX)
1353 EMIT_D3D_OPCODE_DSS_FUNC(SLT)
1354 EMIT_D3D_OPCODE_DSS_FUNC(SGE)
1355 EMIT_D3D_OPCODE_DS_FUNC(EXP)
1356 EMIT_D3D_OPCODE_DS_FUNC(LOG)
1357 EMIT_D3D_OPCODE_DS_FUNC(LIT)
1358 EMIT_D3D_OPCODE_DSS_FUNC(DST)
1359 EMIT_D3D_OPCODE_DSSS_FUNC(LRP)
1360 EMIT_D3D_OPCODE_DS_FUNC(FRC)
1361 EMIT_D3D_OPCODE_DSS_FUNC(M4X4)
1362 EMIT_D3D_OPCODE_DSS_FUNC(M4X3)
1363 EMIT_D3D_OPCODE_DSS_FUNC(M3X4)
1364 EMIT_D3D_OPCODE_DSS_FUNC(M3X3)
1365 EMIT_D3D_OPCODE_DSS_FUNC(M3X2)
1366 EMIT_D3D_OPCODE_S_FUNC(CALL)
1367 EMIT_D3D_OPCODE_SS_FUNC(CALLNZ)
1368 EMIT_D3D_OPCODE_SS_FUNC(LOOP)
1369 EMIT_D3D_OPCODE_FUNC(RET)
1370 EMIT_D3D_OPCODE_FUNC(ENDLOOP)
1371 EMIT_D3D_OPCODE_S_FUNC(LABEL)
1372 EMIT_D3D_OPCODE_DSS_FUNC(POW)
1373 EMIT_D3D_OPCODE_DSS_FUNC(CRS)
1374 EMIT_D3D_OPCODE_DSSS_FUNC(SGN)
1375 EMIT_D3D_OPCODE_DS_FUNC(ABS)
1376 EMIT_D3D_OPCODE_DS_FUNC(NRM)
1377 EMIT_D3D_OPCODE_S_FUNC(REP)
1378 EMIT_D3D_OPCODE_FUNC(ENDREP)
1379 EMIT_D3D_OPCODE_S_FUNC(IF)
1380 EMIT_D3D_OPCODE_FUNC(ELSE)
1381 EMIT_D3D_OPCODE_FUNC(ENDIF)
1382 EMIT_D3D_OPCODE_FUNC(BREAK)
1383 EMIT_D3D_OPCODE_DS_FUNC(MOVA)
1384 EMIT_D3D_OPCODE_D_FUNC(TEXKILL)
1385 EMIT_D3D_OPCODE_DS_FUNC(TEXBEM)
1386 EMIT_D3D_OPCODE_DS_FUNC(TEXBEML)
1387 EMIT_D3D_OPCODE_DS_FUNC(TEXREG2AR)
1388 EMIT_D3D_OPCODE_DS_FUNC(TEXREG2GB)
1389 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2PAD)
1390 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2TEX)
1391 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3PAD)
1392 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3TEX)
1393 EMIT_D3D_OPCODE_DSS_FUNC(TEXM3X3SPEC)
1394 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3VSPEC)
1395 EMIT_D3D_OPCODE_DS_FUNC(EXPP)
1396 EMIT_D3D_OPCODE_DS_FUNC(LOGP)
1397 EMIT_D3D_OPCODE_DSSS_FUNC(CND)
1398 EMIT_D3D_OPCODE_DS_FUNC(TEXREG2RGB)
1399 EMIT_D3D_OPCODE_DS_FUNC(TEXDP3TEX)
1400 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2DEPTH)
1401 EMIT_D3D_OPCODE_DS_FUNC(TEXDP3)
1402 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3)
1403 EMIT_D3D_OPCODE_D_FUNC(TEXDEPTH)
1404 EMIT_D3D_OPCODE_DSSS_FUNC(CMP)
1405 EMIT_D3D_OPCODE_DSS_FUNC(BEM)
1406 EMIT_D3D_OPCODE_DSSS_FUNC(DP2ADD)
1407 EMIT_D3D_OPCODE_DS_FUNC(DSX)
1408 EMIT_D3D_OPCODE_DS_FUNC(DSY)
1409 EMIT_D3D_OPCODE_DSSSS_FUNC(TEXLDD)
1410 EMIT_D3D_OPCODE_DSS_FUNC(TEXLDL)
1411 EMIT_D3D_OPCODE_S_FUNC(BREAKP)
1412
1413 // special cases for comparison opcodes...
1414 static const char *get_D3D_comparison_string(Context *ctx)
1415 {
1416 static const char *comps[] = {
1417 "", "_gt", "_eq", "_ge", "_lt", "_ne", "_le"
1418 };
1419
1420 if (ctx->instruction_controls >= STATICARRAYLEN(comps))
1421 {
1422 fail(ctx, "unknown comparison control");
1423 return "";
1424 } // if
1425
1426 return comps[ctx->instruction_controls];
1427 } // get_D3D_comparison_string
1428
emit_D3D_BREAKC(Context * ctx)1429 static void emit_D3D_BREAKC(Context *ctx)
1430 {
1431 char op[16];
1432 snprintf(op, sizeof (op), "break%s", get_D3D_comparison_string(ctx));
1433 emit_D3D_opcode_ss(ctx, op);
1434 } // emit_D3D_BREAKC
1435
emit_D3D_IFC(Context * ctx)1436 static void emit_D3D_IFC(Context *ctx)
1437 {
1438 char op[16];
1439 snprintf(op, sizeof (op), "if%s", get_D3D_comparison_string(ctx));
1440 emit_D3D_opcode_ss(ctx, op);
1441 } // emit_D3D_IFC
1442
emit_D3D_SETP(Context * ctx)1443 static void emit_D3D_SETP(Context *ctx)
1444 {
1445 char op[16];
1446 snprintf(op, sizeof (op), "setp%s", get_D3D_comparison_string(ctx));
1447 emit_D3D_opcode_dss(ctx, op);
1448 } // emit_D3D_SETP
1449
emit_D3D_DEF(Context * ctx)1450 static void emit_D3D_DEF(Context *ctx)
1451 {
1452 char dst[64];
1453 make_D3D_destarg_string(ctx, dst, sizeof (dst));
1454 const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
1455 char val0[32];
1456 char val1[32];
1457 char val2[32];
1458 char val3[32];
1459 floatstr(ctx, val0, sizeof (val0), val[0], 0);
1460 floatstr(ctx, val1, sizeof (val1), val[1], 0);
1461 floatstr(ctx, val2, sizeof (val2), val[2], 0);
1462 floatstr(ctx, val3, sizeof (val3), val[3], 0);
1463 output_line(ctx, "def%s, %s, %s, %s, %s", dst, val0, val1, val2, val3);
1464 } // emit_D3D_DEF
1465
emit_D3D_DEFI(Context * ctx)1466 static void emit_D3D_DEFI(Context *ctx)
1467 {
1468 char dst[64];
1469 make_D3D_destarg_string(ctx, dst, sizeof (dst));
1470 const int32 *x = (const int32 *) ctx->dwords;
1471 output_line(ctx, "defi%s, %d, %d, %d, %d", dst,
1472 (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
1473 } // emit_D3D_DEFI
1474
emit_D3D_DEFB(Context * ctx)1475 static void emit_D3D_DEFB(Context *ctx)
1476 {
1477 char dst[64];
1478 make_D3D_destarg_string(ctx, dst, sizeof (dst));
1479 output_line(ctx, "defb%s, %s", dst, ctx->dwords[0] ? "true" : "false");
1480 } // emit_D3D_DEFB
1481
1482
emit_D3D_DCL(Context * ctx)1483 static void emit_D3D_DCL(Context *ctx)
1484 {
1485 char dst[64];
1486 make_D3D_destarg_string(ctx, dst, sizeof (dst));
1487 const DestArgInfo *arg = &ctx->dest_arg;
1488 const char *usage_str = "";
1489 char index_str[16] = { '\0' };
1490
1491 if (arg->regtype == REG_TYPE_SAMPLER)
1492 {
1493 switch ((const TextureType) ctx->dwords[0])
1494 {
1495 case TEXTURE_TYPE_2D: usage_str = "_2d"; break;
1496 case TEXTURE_TYPE_CUBE: usage_str = "_cube"; break;
1497 case TEXTURE_TYPE_VOLUME: usage_str = "_volume"; break;
1498 default: fail(ctx, "unknown sampler texture type"); return;
1499 } // switch
1500 } // if
1501
1502 else if (arg->regtype == REG_TYPE_MISCTYPE)
1503 {
1504 switch ((const MiscTypeType) arg->regnum)
1505 {
1506 case MISCTYPE_TYPE_POSITION:
1507 case MISCTYPE_TYPE_FACE:
1508 usage_str = ""; // just become "dcl vFace" or whatever.
1509 break;
1510 default: fail(ctx, "unknown misc register type"); return;
1511 } // switch
1512 } // else if
1513
1514 else
1515 {
1516 const uint32 usage = ctx->dwords[0];
1517 const uint32 index = ctx->dwords[1];
1518 usage_str = usagestrs[usage];
1519 if (index != 0)
1520 snprintf(index_str, sizeof (index_str), "%u", (uint) index);
1521 } // else
1522
1523 output_line(ctx, "dcl%s%s%s", usage_str, index_str, dst);
1524 } // emit_D3D_DCL
1525
1526
emit_D3D_TEXCRD(Context * ctx)1527 static void emit_D3D_TEXCRD(Context *ctx)
1528 {
1529 // this opcode looks and acts differently depending on the shader model.
1530 if (shader_version_atleast(ctx, 1, 4))
1531 emit_D3D_opcode_ds(ctx, "texcrd");
1532 else
1533 emit_D3D_opcode_d(ctx, "texcoord");
1534 } // emit_D3D_TEXCOORD
1535
emit_D3D_TEXLD(Context * ctx)1536 static void emit_D3D_TEXLD(Context *ctx)
1537 {
1538 // this opcode looks and acts differently depending on the shader model.
1539 if (shader_version_atleast(ctx, 2, 0))
1540 {
1541 if (ctx->instruction_controls == CONTROL_TEXLD)
1542 emit_D3D_opcode_dss(ctx, "texld");
1543 else if (ctx->instruction_controls == CONTROL_TEXLDP)
1544 emit_D3D_opcode_dss(ctx, "texldp");
1545 else if (ctx->instruction_controls == CONTROL_TEXLDB)
1546 emit_D3D_opcode_dss(ctx, "texldb");
1547 } // if
1548
1549 else if (shader_version_atleast(ctx, 1, 4))
1550 {
1551 emit_D3D_opcode_ds(ctx, "texld");
1552 } // else if
1553
1554 else
1555 {
1556 emit_D3D_opcode_d(ctx, "tex");
1557 } // else
1558 } // emit_D3D_TEXLD
1559
emit_D3D_SINCOS(Context * ctx)1560 static void emit_D3D_SINCOS(Context *ctx)
1561 {
1562 // this opcode needs extra registers for sm2 and lower.
1563 if (!shader_version_atleast(ctx, 3, 0))
1564 emit_D3D_opcode_dsss(ctx, "sincos");
1565 else
1566 emit_D3D_opcode_ds(ctx, "sincos");
1567 } // emit_D3D_SINCOS
1568
1569
1570 #undef EMIT_D3D_OPCODE_FUNC
1571 #undef EMIT_D3D_OPCODE_D_FUNC
1572 #undef EMIT_D3D_OPCODE_S_FUNC
1573 #undef EMIT_D3D_OPCODE_SS_FUNC
1574 #undef EMIT_D3D_OPCODE_DS_FUNC
1575 #undef EMIT_D3D_OPCODE_DSS_FUNC
1576 #undef EMIT_D3D_OPCODE_DSSS_FUNC
1577 #undef EMIT_D3D_OPCODE_DSSSS_FUNC
1578
1579 #endif // SUPPORT_PROFILE_D3D
1580
1581
1582 #if !SUPPORT_PROFILE_BYTECODE
1583 #define PROFILE_EMITTER_BYTECODE(op)
1584 #else
1585 #undef AT_LEAST_ONE_PROFILE
1586 #define AT_LEAST_ONE_PROFILE 1
1587 #define PROFILE_EMITTER_BYTECODE(op) emit_BYTECODE_##op,
1588
emit_BYTECODE_start(Context * ctx,const char * profilestr)1589 static void emit_BYTECODE_start(Context *ctx, const char *profilestr)
1590 {
1591 ctx->ignores_ctab = 1;
1592
1593 // just copy the whole token stream and make all other emitters no-ops.
1594 if (set_output(ctx, &ctx->mainline))
1595 {
1596 const size_t len = ctx->tokencount * sizeof (uint32);
1597 buffer_append(ctx->mainline, (const char *) ctx->tokens, len);
1598 } // if
1599 } // emit_BYTECODE_start
1600
emit_BYTECODE_end(Context * ctx)1601 static void emit_BYTECODE_end(Context *ctx) {}
emit_BYTECODE_phase(Context * ctx)1602 static void emit_BYTECODE_phase(Context *ctx) {}
emit_BYTECODE_finalize(Context * ctx)1603 static void emit_BYTECODE_finalize(Context *ctx) {}
emit_BYTECODE_global(Context * ctx,RegisterType t,int n)1604 static void emit_BYTECODE_global(Context *ctx, RegisterType t, int n) {}
emit_BYTECODE_array(Context * ctx,VariableList * var)1605 static void emit_BYTECODE_array(Context *ctx, VariableList *var) {}
emit_BYTECODE_sampler(Context * c,int s,TextureType t,int tb)1606 static void emit_BYTECODE_sampler(Context *c, int s, TextureType t, int tb) {}
emit_BYTECODE_const_array(Context * ctx,const ConstantsList * c,int base,int size)1607 static void emit_BYTECODE_const_array(Context *ctx, const ConstantsList *c,
1608 int base, int size) {}
emit_BYTECODE_uniform(Context * ctx,RegisterType t,int n,const VariableList * var)1609 static void emit_BYTECODE_uniform(Context *ctx, RegisterType t, int n,
1610 const VariableList *var) {}
emit_BYTECODE_attribute(Context * ctx,RegisterType t,int n,MOJOSHADER_usage u,int i,int w,int f)1611 static void emit_BYTECODE_attribute(Context *ctx, RegisterType t, int n,
1612 MOJOSHADER_usage u, int i, int w,
1613 int f) {}
1614
get_BYTECODE_varname(Context * ctx,RegisterType rt,int regnum)1615 static const char *get_BYTECODE_varname(Context *ctx, RegisterType rt, int regnum)
1616 {
1617 char regnum_str[16];
1618 const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
1619 regnum_str, sizeof (regnum_str));
1620 char buf[64];
1621 snprintf(buf, sizeof (buf), "%s%s", regtype_str, regnum_str);
1622 return StrDup(ctx, buf);
1623 } // get_BYTECODE_varname
1624
get_BYTECODE_const_array_varname(Context * ctx,int base,int size)1625 static const char *get_BYTECODE_const_array_varname(Context *ctx, int base, int size)
1626 {
1627 char buf[64];
1628 snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size);
1629 return StrDup(ctx, buf);
1630 } // get_BYTECODE_const_array_varname
1631
1632 #define EMIT_BYTECODE_OPCODE_FUNC(op) \
1633 static void emit_BYTECODE_##op(Context *ctx) {}
1634
1635 EMIT_BYTECODE_OPCODE_FUNC(RESERVED)
1636 EMIT_BYTECODE_OPCODE_FUNC(NOP)
1637 EMIT_BYTECODE_OPCODE_FUNC(MOV)
1638 EMIT_BYTECODE_OPCODE_FUNC(ADD)
1639 EMIT_BYTECODE_OPCODE_FUNC(SUB)
1640 EMIT_BYTECODE_OPCODE_FUNC(MAD)
1641 EMIT_BYTECODE_OPCODE_FUNC(MUL)
1642 EMIT_BYTECODE_OPCODE_FUNC(RCP)
1643 EMIT_BYTECODE_OPCODE_FUNC(RSQ)
1644 EMIT_BYTECODE_OPCODE_FUNC(DP3)
1645 EMIT_BYTECODE_OPCODE_FUNC(DP4)
1646 EMIT_BYTECODE_OPCODE_FUNC(MIN)
1647 EMIT_BYTECODE_OPCODE_FUNC(MAX)
1648 EMIT_BYTECODE_OPCODE_FUNC(SLT)
1649 EMIT_BYTECODE_OPCODE_FUNC(SGE)
1650 EMIT_BYTECODE_OPCODE_FUNC(EXP)
1651 EMIT_BYTECODE_OPCODE_FUNC(LOG)
1652 EMIT_BYTECODE_OPCODE_FUNC(LIT)
1653 EMIT_BYTECODE_OPCODE_FUNC(DST)
1654 EMIT_BYTECODE_OPCODE_FUNC(LRP)
1655 EMIT_BYTECODE_OPCODE_FUNC(FRC)
1656 EMIT_BYTECODE_OPCODE_FUNC(M4X4)
1657 EMIT_BYTECODE_OPCODE_FUNC(M4X3)
1658 EMIT_BYTECODE_OPCODE_FUNC(M3X4)
1659 EMIT_BYTECODE_OPCODE_FUNC(M3X3)
1660 EMIT_BYTECODE_OPCODE_FUNC(M3X2)
1661 EMIT_BYTECODE_OPCODE_FUNC(CALL)
1662 EMIT_BYTECODE_OPCODE_FUNC(CALLNZ)
1663 EMIT_BYTECODE_OPCODE_FUNC(LOOP)
1664 EMIT_BYTECODE_OPCODE_FUNC(RET)
1665 EMIT_BYTECODE_OPCODE_FUNC(ENDLOOP)
1666 EMIT_BYTECODE_OPCODE_FUNC(LABEL)
1667 EMIT_BYTECODE_OPCODE_FUNC(POW)
1668 EMIT_BYTECODE_OPCODE_FUNC(CRS)
1669 EMIT_BYTECODE_OPCODE_FUNC(SGN)
1670 EMIT_BYTECODE_OPCODE_FUNC(ABS)
1671 EMIT_BYTECODE_OPCODE_FUNC(NRM)
1672 EMIT_BYTECODE_OPCODE_FUNC(SINCOS)
1673 EMIT_BYTECODE_OPCODE_FUNC(REP)
1674 EMIT_BYTECODE_OPCODE_FUNC(ENDREP)
1675 EMIT_BYTECODE_OPCODE_FUNC(IF)
1676 EMIT_BYTECODE_OPCODE_FUNC(ELSE)
1677 EMIT_BYTECODE_OPCODE_FUNC(ENDIF)
1678 EMIT_BYTECODE_OPCODE_FUNC(BREAK)
1679 EMIT_BYTECODE_OPCODE_FUNC(MOVA)
1680 EMIT_BYTECODE_OPCODE_FUNC(TEXKILL)
1681 EMIT_BYTECODE_OPCODE_FUNC(TEXBEM)
1682 EMIT_BYTECODE_OPCODE_FUNC(TEXBEML)
1683 EMIT_BYTECODE_OPCODE_FUNC(TEXREG2AR)
1684 EMIT_BYTECODE_OPCODE_FUNC(TEXREG2GB)
1685 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2PAD)
1686 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2TEX)
1687 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3PAD)
1688 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3TEX)
1689 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3SPEC)
1690 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3VSPEC)
1691 EMIT_BYTECODE_OPCODE_FUNC(EXPP)
1692 EMIT_BYTECODE_OPCODE_FUNC(LOGP)
1693 EMIT_BYTECODE_OPCODE_FUNC(CND)
1694 EMIT_BYTECODE_OPCODE_FUNC(TEXREG2RGB)
1695 EMIT_BYTECODE_OPCODE_FUNC(TEXDP3TEX)
1696 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2DEPTH)
1697 EMIT_BYTECODE_OPCODE_FUNC(TEXDP3)
1698 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3)
1699 EMIT_BYTECODE_OPCODE_FUNC(TEXDEPTH)
1700 EMIT_BYTECODE_OPCODE_FUNC(CMP)
1701 EMIT_BYTECODE_OPCODE_FUNC(BEM)
1702 EMIT_BYTECODE_OPCODE_FUNC(DP2ADD)
1703 EMIT_BYTECODE_OPCODE_FUNC(DSX)
1704 EMIT_BYTECODE_OPCODE_FUNC(DSY)
1705 EMIT_BYTECODE_OPCODE_FUNC(TEXLDD)
1706 EMIT_BYTECODE_OPCODE_FUNC(TEXLDL)
1707 EMIT_BYTECODE_OPCODE_FUNC(BREAKP)
1708 EMIT_BYTECODE_OPCODE_FUNC(BREAKC)
1709 EMIT_BYTECODE_OPCODE_FUNC(IFC)
1710 EMIT_BYTECODE_OPCODE_FUNC(SETP)
1711 EMIT_BYTECODE_OPCODE_FUNC(DEF)
1712 EMIT_BYTECODE_OPCODE_FUNC(DEFI)
1713 EMIT_BYTECODE_OPCODE_FUNC(DEFB)
1714 EMIT_BYTECODE_OPCODE_FUNC(DCL)
1715 EMIT_BYTECODE_OPCODE_FUNC(TEXCRD)
1716 EMIT_BYTECODE_OPCODE_FUNC(TEXLD)
1717
1718 #undef EMIT_BYTECODE_OPCODE_FUNC
1719
1720 #endif // SUPPORT_PROFILE_BYTECODE
1721
1722
1723 #if !SUPPORT_PROFILE_GLSL
1724 #define PROFILE_EMITTER_GLSL(op)
1725 #else
1726 #undef AT_LEAST_ONE_PROFILE
1727 #define AT_LEAST_ONE_PROFILE 1
1728 #define PROFILE_EMITTER_GLSL(op) emit_GLSL_##op,
1729
1730 #define EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(op) \
1731 static void emit_GLSL_##op(Context *ctx) { \
1732 fail(ctx, #op " unimplemented in glsl profile"); \
1733 }
1734
1735 static inline const char *get_GLSL_register_string(Context *ctx,
1736 const RegisterType regtype, const int regnum,
1737 char *regnum_str, const size_t regnum_size)
1738 {
1739 // turns out these are identical at the moment.
1740 return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
1741 } // get_GLSL_register_string
1742
1743 static const char *get_GLSL_uniform_type(Context *ctx, const RegisterType rtype)
1744 {
1745 switch (rtype)
1746 {
1747 case REG_TYPE_CONST: return "vec4";
1748 case REG_TYPE_CONSTINT: return "ivec4";
1749 case REG_TYPE_CONSTBOOL: return "bool";
1750 default: fail(ctx, "BUG: used a uniform we don't know how to define.");
1751 } // switch
1752
1753 return NULL;
1754 } // get_GLSL_uniform_type
1755
1756 static const char *get_GLSL_varname_in_buf(Context *ctx, RegisterType rt,
1757 int regnum, char *buf,
1758 const size_t len)
1759 {
1760 char regnum_str[16];
1761 const char *regtype_str = get_GLSL_register_string(ctx, rt, regnum,
1762 regnum_str, sizeof (regnum_str));
1763 snprintf(buf,len,"%s_%s%s", ctx->shader_type_str, regtype_str, regnum_str);
1764 return buf;
1765 } // get_GLSL_varname_in_buf
1766
1767
1768 static const char *get_GLSL_varname(Context *ctx, RegisterType rt, int regnum)
1769 {
1770 char buf[64];
1771 get_GLSL_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
1772 return StrDup(ctx, buf);
1773 } // get_GLSL_varname
1774
1775
1776 static inline const char *get_GLSL_const_array_varname_in_buf(Context *ctx,
1777 const int base, const int size,
1778 char *buf, const size_t buflen)
1779 {
1780 const char *type = ctx->shader_type_str;
1781 snprintf(buf, buflen, "%s_const_array_%d_%d", type, base, size);
1782 return buf;
1783 } // get_GLSL_const_array_varname_in_buf
1784
1785 static const char *get_GLSL_const_array_varname(Context *ctx, int base, int size)
1786 {
1787 char buf[64];
1788 get_GLSL_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
1789 return StrDup(ctx, buf);
1790 } // get_GLSL_const_array_varname
1791
1792
1793 static inline const char *get_GLSL_input_array_varname(Context *ctx,
1794 char *buf, const size_t buflen)
1795 {
1796 snprintf(buf, buflen, "%s", "vertex_input_array");
1797 return buf;
1798 } // get_GLSL_input_array_varname
1799
1800
1801 static const char *get_GLSL_uniform_array_varname(Context *ctx,
1802 const RegisterType regtype,
1803 char *buf, const size_t len)
1804 {
1805 const char *shadertype = ctx->shader_type_str;
1806 const char *type = get_GLSL_uniform_type(ctx, regtype);
1807 snprintf(buf, len, "%s_uniforms_%s", shadertype, type);
1808 return buf;
1809 } // get_GLSL_uniform_array_varname
1810
1811 static const char *get_GLSL_destarg_varname(Context *ctx, char *buf, size_t len)
1812 {
1813 const DestArgInfo *arg = &ctx->dest_arg;
1814 return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
1815 } // get_GLSL_destarg_varname
1816
1817 static const char *get_GLSL_srcarg_varname(Context *ctx, const size_t idx,
1818 char *buf, size_t len)
1819 {
1820 if (idx >= STATICARRAYLEN(ctx->source_args))
1821 {
1822 fail(ctx, "Too many source args");
1823 *buf = '\0';
1824 return buf;
1825 } // if
1826
1827 const SourceArgInfo *arg = &ctx->source_args[idx];
1828 return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
1829 } // get_GLSL_srcarg_varname
1830
1831
1832 static const char *make_GLSL_destarg_assign(Context *, char *, const size_t,
1833 const char *, ...) ISPRINTF(4,5);
1834
1835 static const char *make_GLSL_destarg_assign(Context *ctx, char *buf,
1836 const size_t buflen,
1837 const char *fmt, ...)
1838 {
1839 int need_parens = 0;
1840 const DestArgInfo *arg = &ctx->dest_arg;
1841
1842 if (arg->writemask == 0)
1843 {
1844 *buf = '\0';
1845 return buf; // no writemask? It's a no-op.
1846 } // if
1847
1848 char clampbuf[32] = { '\0' };
1849 const char *clampleft = "";
1850 const char *clampright = "";
1851 if (arg->result_mod & MOD_SATURATE)
1852 {
1853 const int vecsize = vecsize_from_writemask(arg->writemask);
1854 clampleft = "clamp(";
1855 if (vecsize == 1)
1856 clampright = ", 0.0, 1.0)";
1857 else
1858 {
1859 snprintf(clampbuf, sizeof (clampbuf),
1860 ", vec%d(0.0), vec%d(1.0))", vecsize, vecsize);
1861 clampright = clampbuf;
1862 } // else
1863 } // if
1864
1865 // MSDN says MOD_PP is a hint and many implementations ignore it. So do we.
1866
1867 // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
1868 assert((arg->result_mod & MOD_CENTROID) == 0);
1869
1870 if (ctx->predicated)
1871 {
1872 fail(ctx, "predicated destinations unsupported"); // !!! FIXME
1873 *buf = '\0';
1874 return buf;
1875 } // if
1876
1877 char operation[256];
1878 va_list ap;
1879 va_start(ap, fmt);
1880 const int len = vsnprintf(operation, sizeof (operation), fmt, ap);
1881 va_end(ap);
1882 if (len >= sizeof (operation))
1883 {
1884 fail(ctx, "operation string too large"); // I'm lazy. :P
1885 *buf = '\0';
1886 return buf;
1887 } // if
1888
1889 const char *result_shift_str = "";
1890 switch (arg->result_shift)
1891 {
1892 case 0x1: result_shift_str = " * 2.0"; break;
1893 case 0x2: result_shift_str = " * 4.0"; break;
1894 case 0x3: result_shift_str = " * 8.0"; break;
1895 case 0xD: result_shift_str = " / 8.0"; break;
1896 case 0xE: result_shift_str = " / 4.0"; break;
1897 case 0xF: result_shift_str = " / 2.0"; break;
1898 } // switch
1899 need_parens |= (result_shift_str[0] != '\0');
1900
1901 char regnum_str[16];
1902 const char *regtype_str = get_GLSL_register_string(ctx, arg->regtype,
1903 arg->regnum, regnum_str,
1904 sizeof (regnum_str));
1905 char writemask_str[6];
1906 size_t i = 0;
1907 const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
1908 if (!scalar && !writemask_xyzw(arg->writemask))
1909 {
1910 writemask_str[i++] = '.';
1911 if (arg->writemask0) writemask_str[i++] = 'x';
1912 if (arg->writemask1) writemask_str[i++] = 'y';
1913 if (arg->writemask2) writemask_str[i++] = 'z';
1914 if (arg->writemask3) writemask_str[i++] = 'w';
1915 } // if
1916 writemask_str[i] = '\0';
1917 assert(i < sizeof (writemask_str));
1918
1919 const char *leftparen = (need_parens) ? "(" : "";
1920 const char *rightparen = (need_parens) ? ")" : "";
1921
1922 snprintf(buf, buflen, "%s_%s%s%s = %s%s%s%s%s%s;",
1923 ctx->shader_type_str, regtype_str, regnum_str, writemask_str,
1924 clampleft, leftparen, operation, rightparen, result_shift_str,
1925 clampright);
1926 // !!! FIXME: make sure the scratch buffer was large enough.
1927 return buf;
1928 } // make_GLSL_destarg_assign
1929
1930
1931 static char *make_GLSL_swizzle_string(char *swiz_str, const size_t strsize,
1932 const int swizzle, const int writemask)
1933 {
1934 size_t i = 0;
1935 if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) )
1936 {
1937 const int writemask0 = (writemask >> 0) & 0x1;
1938 const int writemask1 = (writemask >> 1) & 0x1;
1939 const int writemask2 = (writemask >> 2) & 0x1;
1940 const int writemask3 = (writemask >> 3) & 0x1;
1941
1942 const int swizzle_x = (swizzle >> 0) & 0x3;
1943 const int swizzle_y = (swizzle >> 2) & 0x3;
1944 const int swizzle_z = (swizzle >> 4) & 0x3;
1945 const int swizzle_w = (swizzle >> 6) & 0x3;
1946
1947 swiz_str[i++] = '.';
1948 if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x];
1949 if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y];
1950 if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z];
1951 if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w];
1952 } // if
1953 assert(i < strsize);
1954 swiz_str[i] = '\0';
1955 return swiz_str;
1956 } // make_GLSL_swizzle_string
1957
1958
1959 static const char *make_GLSL_srcarg_string(Context *ctx, const size_t idx,
1960 const int writemask, char *buf,
1961 const size_t buflen)
1962 {
1963 *buf = '\0';
1964
1965 if (idx >= STATICARRAYLEN(ctx->source_args))
1966 {
1967 fail(ctx, "Too many source args");
1968 return buf;
1969 } // if
1970
1971 const SourceArgInfo *arg = &ctx->source_args[idx];
1972
1973 const char *premod_str = "";
1974 const char *postmod_str = "";
1975 switch (arg->src_mod)
1976 {
1977 case SRCMOD_NEGATE:
1978 premod_str = "-";
1979 break;
1980
1981 case SRCMOD_BIASNEGATE:
1982 premod_str = "-(";
1983 postmod_str = " - 0.5)";
1984 break;
1985
1986 case SRCMOD_BIAS:
1987 premod_str = "(";
1988 postmod_str = " - 0.5)";
1989 break;
1990
1991 case SRCMOD_SIGNNEGATE:
1992 premod_str = "-((";
1993 postmod_str = " - 0.5) * 2.0)";
1994 break;
1995
1996 case SRCMOD_SIGN:
1997 premod_str = "((";
1998 postmod_str = " - 0.5) * 2.0)";
1999 break;
2000
2001 case SRCMOD_COMPLEMENT:
2002 premod_str = "(1.0 - ";
2003 postmod_str = ")";
2004 break;
2005
2006 case SRCMOD_X2NEGATE:
2007 premod_str = "-(";
2008 postmod_str = " * 2.0)";
2009 break;
2010
2011 case SRCMOD_X2:
2012 premod_str = "(";
2013 postmod_str = " * 2.0)";
2014 break;
2015
2016 case SRCMOD_DZ:
2017 fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME
2018 postmod_str = "_dz";
2019 break;
2020
2021 case SRCMOD_DW:
2022 fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME
2023 postmod_str = "_dw";
2024 break;
2025
2026 case SRCMOD_ABSNEGATE:
2027 premod_str = "-abs(";
2028 postmod_str = ")";
2029 break;
2030
2031 case SRCMOD_ABS:
2032 premod_str = "abs(";
2033 postmod_str = ")";
2034 break;
2035
2036 case SRCMOD_NOT:
2037 premod_str = "!";
2038 break;
2039
2040 case SRCMOD_NONE:
2041 case SRCMOD_TOTAL:
2042 break; // stop compiler whining.
2043 } // switch
2044
2045 const char *regtype_str = NULL;
2046
2047 if (!arg->relative)
2048 {
2049 regtype_str = get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum,
2050 (char *) alloca(64), 64);
2051 } // if
2052
2053 const char *rel_lbracket = "";
2054 char rel_offset[32] = { '\0' };
2055 const char *rel_rbracket = "";
2056 char rel_swizzle[4] = { '\0' };
2057 const char *rel_regtype_str = "";
2058 if (arg->relative)
2059 {
2060 if (arg->regtype == REG_TYPE_INPUT)
2061 regtype_str=get_GLSL_input_array_varname(ctx,(char*)alloca(64),64);
2062 else
2063 {
2064 assert(arg->regtype == REG_TYPE_CONST);
2065 const int arrayidx = arg->relative_array->index;
2066 const int offset = arg->regnum - arrayidx;
2067 assert(offset >= 0);
2068 if (arg->relative_array->constant)
2069 {
2070 const int arraysize = arg->relative_array->count;
2071 regtype_str = get_GLSL_const_array_varname_in_buf(ctx,
2072 arrayidx, arraysize, (char *) alloca(64), 64);
2073 if (offset != 0)
2074 snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset);
2075 } // if
2076 else
2077 {
2078 regtype_str = get_GLSL_uniform_array_varname(ctx, arg->regtype,
2079 (char *) alloca(64), 64);
2080 if (offset == 0)
2081 {
2082 snprintf(rel_offset, sizeof (rel_offset),
2083 "ARRAYBASE_%d + ", arrayidx);
2084 } // if
2085 else
2086 {
2087 snprintf(rel_offset, sizeof (rel_offset),
2088 "(ARRAYBASE_%d + %d) + ", arrayidx, offset);
2089 } // else
2090 } // else
2091 } // else
2092
2093 rel_lbracket = "[";
2094
2095 rel_regtype_str = get_GLSL_varname_in_buf(ctx, arg->relative_regtype,
2096 arg->relative_regnum,
2097 (char *) alloca(64), 64);
2098 rel_swizzle[0] = '.';
2099 rel_swizzle[1] = swizzle_channels[arg->relative_component];
2100 rel_swizzle[2] = '\0';
2101 rel_rbracket = "]";
2102 } // if
2103
2104 char swiz_str[6] = { '\0' };
2105 if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum))
2106 {
2107 make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
2108 arg->swizzle, writemask);
2109 } // if
2110
2111 if (regtype_str == NULL)
2112 {
2113 fail(ctx, "Unknown source register type.");
2114 return buf;
2115 } // if
2116
2117 snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s",
2118 premod_str, regtype_str, rel_lbracket, rel_offset,
2119 rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str,
2120 postmod_str);
2121 // !!! FIXME: make sure the scratch buffer was large enough.
2122 return buf;
2123 } // make_GLSL_srcarg_string
2124
2125 // generate some convenience functions.
2126 #define MAKE_GLSL_SRCARG_STRING_(mask, bitmask) \
2127 static inline const char *make_GLSL_srcarg_string_##mask(Context *ctx, \
2128 const size_t idx, char *buf, \
2129 const size_t buflen) { \
2130 return make_GLSL_srcarg_string(ctx, idx, bitmask, buf, buflen); \
2131 }
2132 MAKE_GLSL_SRCARG_STRING_(x, (1 << 0))
2133 MAKE_GLSL_SRCARG_STRING_(y, (1 << 1))
2134 MAKE_GLSL_SRCARG_STRING_(z, (1 << 2))
2135 MAKE_GLSL_SRCARG_STRING_(w, (1 << 3))
2136 MAKE_GLSL_SRCARG_STRING_(scalar, (1 << 0))
2137 MAKE_GLSL_SRCARG_STRING_(full, 0xF)
2138 MAKE_GLSL_SRCARG_STRING_(masked, ctx->dest_arg.writemask)
2139 MAKE_GLSL_SRCARG_STRING_(vec3, 0x7)
2140 MAKE_GLSL_SRCARG_STRING_(vec2, 0x3)
2141 #undef MAKE_GLSL_SRCARG_STRING_
2142
2143 // special cases for comparison opcodes...
2144
2145 static const char *get_GLSL_comparison_string_scalar(Context *ctx)
2146 {
2147 static const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" };
2148 if (ctx->instruction_controls >= STATICARRAYLEN(comps))
2149 {
2150 fail(ctx, "unknown comparison control");
2151 return "";
2152 } // if
2153
2154 return comps[ctx->instruction_controls];
2155 } // get_GLSL_comparison_string_scalar
2156
2157 static const char *get_GLSL_comparison_string_vector(Context *ctx)
2158 {
2159 static const char *comps[] = {
2160 "", "greaterThan", "equal", "greaterThanEqual", "lessThan",
2161 "notEqual", "lessThanEqual"
2162 };
2163
2164 if (ctx->instruction_controls >= STATICARRAYLEN(comps))
2165 {
2166 fail(ctx, "unknown comparison control");
2167 return "";
2168 } // if
2169
2170 return comps[ctx->instruction_controls];
2171 } // get_GLSL_comparison_string_vector
2172
2173
2174 static void emit_GLSL_start(Context *ctx, const char *profilestr)
2175 {
2176 if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx))
2177 {
2178 failf(ctx, "Shader type %u unsupported in this profile.",
2179 (uint) ctx->shader_type);
2180 return;
2181 } // if
2182
2183 else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL) == 0)
2184 {
2185 // No gl_FragData[] before GLSL 1.10, so we have to force the version.
2186 push_output(ctx, &ctx->preflight);
2187 output_line(ctx, "#version 110");
2188 pop_output(ctx);
2189 } // else if
2190
2191 #if SUPPORT_PROFILE_GLSL120
2192 else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL120) == 0)
2193 {
2194 ctx->profile_supports_glsl120 = 1;
2195 push_output(ctx, &ctx->preflight);
2196 output_line(ctx, "#version 120");
2197 pop_output(ctx);
2198 } // else if
2199 #endif
2200
2201 else
2202 {
2203 failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
2204 return;
2205 } // else
2206
2207 push_output(ctx, &ctx->mainline_intro);
2208 output_line(ctx, "void main()");
2209 output_line(ctx, "{");
2210 pop_output(ctx);
2211
2212 set_output(ctx, &ctx->mainline);
2213 ctx->indent++;
2214 } // emit_GLSL_start
2215
2216 static void emit_GLSL_RET(Context *ctx);
2217 static void emit_GLSL_end(Context *ctx)
2218 {
2219 // ps_1_* writes color to r0 instead oC0. We move it to the right place.
2220 // We don't have to worry about a RET opcode messing this up, since
2221 // RET isn't available before ps_2_0.
2222 if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
2223 {
2224 const char *shstr = ctx->shader_type_str;
2225 set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
2226 output_line(ctx, "%s_oC0 = %s_r0;", shstr, shstr);
2227 } // if
2228
2229 // force a RET opcode if we're at the end of the stream without one.
2230 if (ctx->previous_opcode != OPCODE_RET)
2231 emit_GLSL_RET(ctx);
2232 } // emit_GLSL_end
2233
2234 static void emit_GLSL_phase(Context *ctx)
2235 {
2236 // no-op in GLSL.
2237 } // emit_GLSL_phase
2238
2239 static void output_GLSL_uniform_array(Context *ctx, const RegisterType regtype,
2240 const int size)
2241 {
2242 if (size > 0)
2243 {
2244 char buf[64];
2245 get_GLSL_uniform_array_varname(ctx, regtype, buf, sizeof (buf));
2246 output_line(ctx, "uniform vec4 %s[%d];", buf, size);
2247 } // if
2248 } // output_GLSL_uniform_array
2249
2250 static void emit_GLSL_finalize(Context *ctx)
2251 {
2252 // throw some blank lines around to make source more readable.
2253 push_output(ctx, &ctx->globals);
2254 output_blank_line(ctx);
2255 pop_output(ctx);
2256
2257 // If we had a relative addressing of REG_TYPE_INPUT, we need to build
2258 // an array for it at the start of main(). GLSL doesn't let you specify
2259 // arrays of attributes.
2260 //vec4 blah_array[BIGGEST_ARRAY];
2261 if (ctx->have_relative_input_registers) // !!! FIXME
2262 fail(ctx, "Relative addressing of input registers not supported.");
2263
2264 push_output(ctx, &ctx->preflight);
2265 output_GLSL_uniform_array(ctx, REG_TYPE_CONST, ctx->uniform_float4_count);
2266 output_GLSL_uniform_array(ctx, REG_TYPE_CONSTINT, ctx->uniform_int4_count);
2267 output_GLSL_uniform_array(ctx, REG_TYPE_CONSTBOOL, ctx->uniform_bool_count);
2268 pop_output(ctx);
2269 } // emit_GLSL_finalize
2270
2271 static void emit_GLSL_global(Context *ctx, RegisterType regtype, int regnum)
2272 {
2273 char varname[64];
2274 get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
2275
2276 push_output(ctx, &ctx->globals);
2277 switch (regtype)
2278 {
2279 case REG_TYPE_ADDRESS:
2280 if (shader_is_vertex(ctx))
2281 output_line(ctx, "ivec4 %s;", varname);
2282 else if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE.
2283 {
2284 // We have to map texture registers to temps for ps_1_1, since
2285 // they work like temps, initialize with tex coords, and the
2286 // ps_1_1 TEX opcode expects to overwrite it.
2287 if (!shader_version_atleast(ctx, 1, 4))
2288 {
2289 output_line(ctx, "vec4 %s = gl_TexCoord[%d];",
2290 varname, regnum);
2291 } // if
2292 } // else if
2293 break;
2294 case REG_TYPE_PREDICATE:
2295 output_line(ctx, "bvec4 %s;", varname);
2296 break;
2297 case REG_TYPE_TEMP:
2298 output_line(ctx, "vec4 %s;", varname);
2299 break;
2300 case REG_TYPE_LOOP:
2301 break; // no-op. We declare these in for loops at the moment.
2302 case REG_TYPE_LABEL:
2303 break; // no-op. If we see it here, it means we optimized it out.
2304 default:
2305 fail(ctx, "BUG: we used a register we don't know how to define.");
2306 break;
2307 } // switch
2308 pop_output(ctx);
2309 } // emit_GLSL_global
2310
2311 static void emit_GLSL_array(Context *ctx, VariableList *var)
2312 {
2313 // All uniforms (except constant arrays, which only get pushed once at
2314 // compile time) are now packed into a single array, so we can batch
2315 // the uniform transfers. So this is doesn't actually define an array
2316 // here; the one, big array is emitted during finalization instead.
2317 // However, we need to #define the offset into the one, big array here,
2318 // and let dereferences use that #define.
2319 const int base = var->index;
2320 const int glslbase = ctx->uniform_float4_count;
2321 push_output(ctx, &ctx->globals);
2322 output_line(ctx, "#define ARRAYBASE_%d %d", base, glslbase);
2323 pop_output(ctx);
2324 var->emit_position = glslbase;
2325 } // emit_GLSL_array
2326
2327 static void emit_GLSL_const_array(Context *ctx, const ConstantsList *clist,
2328 int base, int size)
2329 {
2330 char varname[64];
2331 get_GLSL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname));
2332
2333 #if 0
2334 // !!! FIXME: fails on Nvidia's and Apple's GL, even with #version 120.
2335 // !!! FIXME: (the 1.20 spec says it should work, though, I think...)
2336 if (support_glsl120(ctx))
2337 {
2338 // GLSL 1.20 can do constant arrays.
2339 const char *cstr = NULL;
2340 push_output(ctx, &ctx->globals);
2341 output_line(ctx, "const vec4 %s[%d] = vec4[%d](", varname, size, size);
2342 ctx->indent++;
2343
2344 int i;
2345 for (i = 0; i < size; i++)
2346 {
2347 while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
2348 clist = clist->next;
2349 assert(clist->constant.index == (base + i));
2350
2351 char val0[32];
2352 char val1[32];
2353 char val2[32];
2354 char val3[32];
2355 floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
2356 floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
2357 floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
2358 floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
2359
2360 output_line(ctx, "vec4(%s, %s, %s, %s)%s", val0, val1, val2, val3,
2361 (i < (size-1)) ? "," : "");
2362
2363 clist = clist->next;
2364 } // for
2365
2366 ctx->indent--;
2367 output_line(ctx, ");");
2368 pop_output(ctx);
2369 } // if
2370
2371 else
2372 #endif
2373 {
2374 // stock GLSL 1.0 can't do constant arrays, so make a uniform array
2375 // and have the OpenGL glue assign it at link time. Lame!
2376 push_output(ctx, &ctx->globals);
2377 output_line(ctx, "uniform vec4 %s[%d];", varname, size);
2378 pop_output(ctx);
2379 } // else
2380 } // emit_GLSL_const_array
2381
2382 static void emit_GLSL_uniform(Context *ctx, RegisterType regtype, int regnum,
2383 const VariableList *var)
2384 {
2385 // Now that we're pushing all the uniforms as one big array, pack these
2386 // down, so if we only use register c439, it'll actually map to
2387 // glsl_uniforms_vec4[0]. As we push one big array, this will prevent
2388 // uploading unused data.
2389
2390 char varname[64];
2391 char name[64];
2392 int index = 0;
2393
2394 get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
2395
2396 push_output(ctx, &ctx->globals);
2397
2398 if (var == NULL)
2399 {
2400 get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name));
2401
2402 if (regtype == REG_TYPE_CONST)
2403 index = ctx->uniform_float4_count;
2404 else if (regtype == REG_TYPE_CONSTINT)
2405 index = ctx->uniform_int4_count;
2406 else if (regtype == REG_TYPE_CONSTBOOL)
2407 index = ctx->uniform_bool_count;
2408 else // get_GLSL_uniform_array_varname() would have called fail().
2409 assert(isfail(ctx));
2410
2411 output_line(ctx, "#define %s %s[%d]", varname, name, index);
2412 } // if
2413
2414 else
2415 {
2416 const int arraybase = var->index;
2417 if (var->constant)
2418 {
2419 get_GLSL_const_array_varname_in_buf(ctx, arraybase, var->count,
2420 name, sizeof (name));
2421 index = (regnum - arraybase);
2422 } // if
2423 else
2424 {
2425 assert(var->emit_position != -1);
2426 get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name));
2427 index = (regnum - arraybase) + var->emit_position;
2428 } // else
2429
2430 output_line(ctx, "#define %s %s[%d]", varname, name, index);
2431 } // else
2432
2433 pop_output(ctx);
2434 } // emit_GLSL_uniform
2435
2436 static void emit_GLSL_sampler(Context *ctx,int stage,TextureType ttype,int tb)
2437 {
2438 const char *type = "";
2439 switch (ttype)
2440 {
2441 case TEXTURE_TYPE_2D: type = "sampler2D"; break;
2442 case TEXTURE_TYPE_CUBE: type = "samplerCube"; break;
2443 case TEXTURE_TYPE_VOLUME: type = "sampler3D"; break;
2444 default: fail(ctx, "BUG: used a sampler we don't know how to define.");
2445 } // switch
2446
2447 char var[64];
2448 get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof (var));
2449
2450 push_output(ctx, &ctx->globals);
2451 output_line(ctx, "uniform %s %s;", type, var);
2452 if (tb) // This sampler used a ps_1_1 TEXBEM opcode?
2453 {
2454 char name[64];
2455 const int index = ctx->uniform_float4_count;
2456 ctx->uniform_float4_count += 2;
2457 get_GLSL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name));
2458 output_line(ctx, "#define %s_texbem %s[%d]", var, name, index);
2459 output_line(ctx, "#define %s_texbeml %s[%d]", var, name, index+1);
2460 } // if
2461 pop_output(ctx);
2462 } // emit_GLSL_sampler
2463
2464 static void emit_GLSL_attribute(Context *ctx, RegisterType regtype, int regnum,
2465 MOJOSHADER_usage usage, int index, int wmask,
2466 int flags)
2467 {
2468 // !!! FIXME: this function doesn't deal with write masks at all yet!
2469 const char *usage_str = NULL;
2470 const char *arrayleft = "";
2471 const char *arrayright = "";
2472 char index_str[16] = { '\0' };
2473 char var[64];
2474
2475 get_GLSL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var));
2476
2477 //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed?
2478
2479 if (index != 0) // !!! FIXME: a lot of these MUST be zero.
2480 snprintf(index_str, sizeof (index_str), "%u", (uint) index);
2481
2482 if (shader_is_vertex(ctx))
2483 {
2484 // pre-vs3 output registers.
2485 // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
2486 // output registers.
2487 if (!shader_version_atleast(ctx, 3, 0))
2488 {
2489 if (regtype == REG_TYPE_RASTOUT)
2490 {
2491 regtype = REG_TYPE_OUTPUT;
2492 index = regnum;
2493 switch ((const RastOutType) regnum)
2494 {
2495 case RASTOUT_TYPE_POSITION:
2496 usage = MOJOSHADER_USAGE_POSITION;
2497 break;
2498 case RASTOUT_TYPE_FOG:
2499 usage = MOJOSHADER_USAGE_FOG;
2500 break;
2501 case RASTOUT_TYPE_POINT_SIZE:
2502 usage = MOJOSHADER_USAGE_POINTSIZE;
2503 break;
2504 } // switch
2505 } // if
2506
2507 else if (regtype == REG_TYPE_ATTROUT)
2508 {
2509 regtype = REG_TYPE_OUTPUT;
2510 usage = MOJOSHADER_USAGE_COLOR;
2511 index = regnum;
2512 } // else if
2513
2514 else if (regtype == REG_TYPE_TEXCRDOUT)
2515 {
2516 regtype = REG_TYPE_OUTPUT;
2517 usage = MOJOSHADER_USAGE_TEXCOORD;
2518 index = regnum;
2519 } // else if
2520 } // if
2521
2522 // to avoid limitations of various GL entry points for input
2523 // attributes (glSecondaryColorPointer() can only take 3 component
2524 // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
2525 // issues), we set up all inputs as generic vertex attributes, so we
2526 // can pass data in just about any form, and ignore the built-in GLSL
2527 // attributes like gl_SecondaryColor. Output needs to use the the
2528 // built-ins, though, but we don't have to worry about the GL entry
2529 // point limitations there.
2530
2531 if (regtype == REG_TYPE_INPUT)
2532 {
2533 push_output(ctx, &ctx->globals);
2534 output_line(ctx, "attribute vec4 %s;", var);
2535 pop_output(ctx);
2536 } // if
2537
2538 else if (regtype == REG_TYPE_OUTPUT)
2539 {
2540 switch (usage)
2541 {
2542 case MOJOSHADER_USAGE_POSITION:
2543 usage_str = "gl_Position";
2544 break;
2545 case MOJOSHADER_USAGE_POINTSIZE:
2546 usage_str = "gl_PointSize";
2547 break;
2548 case MOJOSHADER_USAGE_COLOR:
2549 index_str[0] = '\0'; // no explicit number.
2550 if (index == 0)
2551 usage_str = "gl_FrontColor";
2552 else if (index == 1)
2553 usage_str = "gl_FrontSecondaryColor";
2554 break;
2555 case MOJOSHADER_USAGE_FOG:
2556 usage_str = "gl_FogFragCoord";
2557 break;
2558 case MOJOSHADER_USAGE_TEXCOORD:
2559 snprintf(index_str, sizeof (index_str), "%u", (uint) index);
2560 usage_str = "gl_TexCoord";
2561 arrayleft = "[";
2562 arrayright = "]";
2563 break;
2564 default:
2565 // !!! FIXME: we need to deal with some more built-in varyings here.
2566 break;
2567 } // switch
2568
2569 // !!! FIXME: the #define is a little hacky, but it means we don't
2570 // !!! FIXME: have to track these separately if this works.
2571 push_output(ctx, &ctx->globals);
2572 // no mapping to built-in var? Just make it a regular global, pray.
2573 if (usage_str == NULL)
2574 output_line(ctx, "vec4 %s;", var);
2575 else
2576 {
2577 output_line(ctx, "#define %s %s%s%s%s", var, usage_str,
2578 arrayleft, index_str, arrayright);
2579 } // else
2580 pop_output(ctx);
2581 } // else if
2582
2583 else
2584 {
2585 fail(ctx, "unknown vertex shader attribute register");
2586 } // else
2587 } // if
2588
2589 else if (shader_is_pixel(ctx))
2590 {
2591 // samplers DCLs get handled in emit_GLSL_sampler().
2592
2593 if (flags & MOD_CENTROID) // !!! FIXME
2594 {
2595 failf(ctx, "centroid unsupported in %s profile", ctx->profile->name);
2596 return;
2597 } // if
2598
2599 if (regtype == REG_TYPE_COLOROUT)
2600 {
2601 if (!ctx->have_multi_color_outputs)
2602 usage_str = "gl_FragColor"; // maybe faster?
2603 else
2604 {
2605 snprintf(index_str, sizeof (index_str), "%u", (uint) regnum);
2606 usage_str = "gl_FragData";
2607 arrayleft = "[";
2608 arrayright = "]";
2609 } // else
2610 } // if
2611
2612 else if (regtype == REG_TYPE_DEPTHOUT)
2613 usage_str = "gl_FragDepth";
2614
2615 // !!! FIXME: can you actualy have a texture register with COLOR usage?
2616 else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
2617 {
2618 if (usage == MOJOSHADER_USAGE_TEXCOORD)
2619 {
2620 // ps_1_1 does a different hack for this attribute.
2621 // Refer to emit_GLSL_global()'s REG_TYPE_TEXTURE code.
2622 if (shader_version_atleast(ctx, 1, 4))
2623 {
2624 snprintf(index_str, sizeof (index_str), "%u", (uint) index);
2625 usage_str = "gl_TexCoord";
2626 arrayleft = "[";
2627 arrayright = "]";
2628 } // if
2629 } // if
2630
2631 else if (usage == MOJOSHADER_USAGE_COLOR)
2632 {
2633 index_str[0] = '\0'; // no explicit number.
2634 if (index == 0)
2635 usage_str = "gl_Color";
2636 else if (index == 1)
2637 usage_str = "gl_SecondaryColor";
2638 else
2639 fail(ctx, "unsupported color index");
2640 } // else if
2641 } // else if
2642
2643 else if (regtype == REG_TYPE_MISCTYPE)
2644 {
2645 const MiscTypeType mt = (MiscTypeType) regnum;
2646 if (mt == MISCTYPE_TYPE_FACE)
2647 {
2648 push_output(ctx, &ctx->globals);
2649 output_line(ctx, "float %s = gl_FrontFacing ? 1.0 : -1.0;", var);
2650 pop_output(ctx);
2651 } // if
2652 else if (mt == MISCTYPE_TYPE_POSITION)
2653 {
2654 index_str[0] = '\0'; // no explicit number.
2655 usage_str = "gl_FragCoord"; // !!! FIXME: is this the same coord space as D3D?
2656 } // else if
2657 else
2658 {
2659 fail(ctx, "BUG: unhandled misc register");
2660 } // else
2661 } // else if
2662
2663 else
2664 {
2665 fail(ctx, "unknown pixel shader attribute register");
2666 } // else
2667
2668 if (usage_str != NULL)
2669 {
2670 push_output(ctx, &ctx->globals);
2671 output_line(ctx, "#define %s %s%s%s%s", var, usage_str,
2672 arrayleft, index_str, arrayright);
2673 pop_output(ctx);
2674 } // if
2675 } // else if
2676
2677 else
2678 {
2679 fail(ctx, "Unknown shader type"); // state machine should catch this.
2680 } // else
2681 } // emit_GLSL_attribute
2682
2683 static void emit_GLSL_NOP(Context *ctx)
2684 {
2685 // no-op is a no-op. :)
2686 } // emit_GLSL_NOP
2687
2688 static void emit_GLSL_MOV(Context *ctx)
2689 {
2690 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2691 char code[128];
2692 make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s", src0);
2693 output_line(ctx, "%s", code);
2694 } // emit_GLSL_MOV
2695
2696 static void emit_GLSL_ADD(Context *ctx)
2697 {
2698 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2699 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2700 char code[128];
2701 make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s + %s", src0, src1);
2702 output_line(ctx, "%s", code);
2703 } // emit_GLSL_ADD
2704
2705 static void emit_GLSL_SUB(Context *ctx)
2706 {
2707 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2708 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2709 char code[128];
2710 make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s - %s", src0, src1);
2711 output_line(ctx, "%s", code);
2712 } // emit_GLSL_SUB
2713
2714 static void emit_GLSL_MAD(Context *ctx)
2715 {
2716 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2717 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2718 char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2));
2719 char code[128];
2720 make_GLSL_destarg_assign(ctx, code, sizeof (code), "(%s * %s) + %s", src0, src1, src2);
2721 output_line(ctx, "%s", code);
2722 } // emit_GLSL_MAD
2723
2724 static void emit_GLSL_MUL(Context *ctx)
2725 {
2726 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2727 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2728 char code[128];
2729 make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s * %s", src0, src1);
2730 output_line(ctx, "%s", code);
2731 } // emit_GLSL_MUL
2732
2733 static void emit_GLSL_RCP(Context *ctx)
2734 {
2735 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2736 char code[128];
2737 make_GLSL_destarg_assign(ctx, code, sizeof (code), "1.0 / %s", src0);
2738 output_line(ctx, "%s", code);
2739 } // emit_GLSL_RCP
2740
2741 static void emit_GLSL_RSQ(Context *ctx)
2742 {
2743 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2744 char code[128];
2745 make_GLSL_destarg_assign(ctx, code, sizeof (code), "inversesqrt(%s)", src0);
2746 output_line(ctx, "%s", code);
2747 } // emit_GLSL_RSQ
2748
2749 static void emit_GLSL_dotprod(Context *ctx, const char *src0, const char *src1,
2750 const char *extra)
2751 {
2752 const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
2753 char castleft[16] = { '\0' };
2754 const char *castright = "";
2755 if (vecsize != 1)
2756 {
2757 snprintf(castleft, sizeof (castleft), "vec%d(", vecsize);
2758 castright = ")";
2759 } // if
2760
2761 char code[128];
2762 make_GLSL_destarg_assign(ctx, code, sizeof (code), "%sdot(%s, %s)%s%s",
2763 castleft, src0, src1, extra, castright);
2764 output_line(ctx, "%s", code);
2765 } // emit_GLSL_dotprod
2766
2767 static void emit_GLSL_DP3(Context *ctx)
2768 {
2769 char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
2770 char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1));
2771 emit_GLSL_dotprod(ctx, src0, src1, "");
2772 } // emit_GLSL_DP3
2773
2774 static void emit_GLSL_DP4(Context *ctx)
2775 {
2776 char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
2777 char src1[64]; make_GLSL_srcarg_string_full(ctx, 1, src1, sizeof (src1));
2778 emit_GLSL_dotprod(ctx, src0, src1, "");
2779 } // emit_GLSL_DP4
2780
2781 static void emit_GLSL_MIN(Context *ctx)
2782 {
2783 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2784 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2785 char code[128];
2786 make_GLSL_destarg_assign(ctx, code, sizeof (code), "min(%s, %s)", src0, src1);
2787 output_line(ctx, "%s", code);
2788 } // emit_GLSL_MIN
2789
2790 static void emit_GLSL_MAX(Context *ctx)
2791 {
2792 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2793 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2794 char code[128];
2795 make_GLSL_destarg_assign(ctx, code, sizeof (code), "max(%s, %s)", src0, src1);
2796 output_line(ctx, "%s", code);
2797 } // emit_GLSL_MAX
2798
2799 static void emit_GLSL_SLT(Context *ctx)
2800 {
2801 const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
2802 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2803 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2804 char code[128];
2805
2806 // float(bool) or vec(bvec) results in 0.0 or 1.0, like SLT wants.
2807 if (vecsize == 1)
2808 make_GLSL_destarg_assign(ctx, code, sizeof (code), "float(%s < %s)", src0, src1);
2809 else
2810 {
2811 make_GLSL_destarg_assign(ctx, code, sizeof (code),
2812 "vec%d(lessThan(%s, %s))",
2813 vecsize, src0, src1);
2814 } // else
2815 output_line(ctx, "%s", code);
2816 } // emit_GLSL_SLT
2817
2818 static void emit_GLSL_SGE(Context *ctx)
2819 {
2820 const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
2821 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2822 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2823 char code[128];
2824
2825 // float(bool) or vec(bvec) results in 0.0 or 1.0, like SGE wants.
2826 if (vecsize == 1)
2827 {
2828 make_GLSL_destarg_assign(ctx, code, sizeof (code),
2829 "float(%s >= %s)", src0, src1);
2830 } // if
2831 else
2832 {
2833 make_GLSL_destarg_assign(ctx, code, sizeof (code),
2834 "vec%d(greaterThanEqual(%s, %s))",
2835 vecsize, src0, src1);
2836 } // else
2837 output_line(ctx, "%s", code);
2838 } // emit_GLSL_SGE
2839
2840 static void emit_GLSL_EXP(Context *ctx)
2841 {
2842 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2843 char code[128];
2844 make_GLSL_destarg_assign(ctx, code, sizeof (code), "exp2(%s)", src0);
2845 output_line(ctx, "%s", code);
2846 } // emit_GLSL_EXP
2847
2848 static void emit_GLSL_LOG(Context *ctx)
2849 {
2850 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2851 char code[128];
2852 make_GLSL_destarg_assign(ctx, code, sizeof (code), "log2(%s)", src0);
2853 output_line(ctx, "%s", code);
2854 } // emit_GLSL_LOG
2855
2856 static void emit_GLSL_LIT_helper(Context *ctx)
2857 {
2858 const char *maxp = "127.9961"; // value from the dx9 reference.
2859
2860 if (ctx->glsl_generated_lit_helper)
2861 return;
2862
2863 ctx->glsl_generated_lit_helper = 1;
2864
2865 push_output(ctx, &ctx->helpers);
2866 output_line(ctx, "vec4 LIT(const vec4 src)");
2867 output_line(ctx, "{"); ctx->indent++;
2868 output_line(ctx, "float power = clamp(src.w, -%s, %s);",maxp,maxp);
2869 output_line(ctx, "vec4 retval = vec4(1.0, 0.0, 0.0, 1.0);");
2870 output_line(ctx, "if (src.x > 0.0) {"); ctx->indent++;
2871 output_line(ctx, "retval.y = src.x;");
2872 output_line(ctx, "if (src.y > 0.0) {"); ctx->indent++;
2873 output_line(ctx, "retval.z = pow(src.y, power);"); ctx->indent--;
2874 output_line(ctx, "}"); ctx->indent--;
2875 output_line(ctx, "}");
2876 output_line(ctx, "return retval;"); ctx->indent--;
2877 output_line(ctx, "}");
2878 output_blank_line(ctx);
2879 pop_output(ctx);
2880 } // emit_GLSL_LIT_helper
2881
2882 static void emit_GLSL_LIT(Context *ctx)
2883 {
2884 char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
2885 char code[128];
2886 emit_GLSL_LIT_helper(ctx);
2887 make_GLSL_destarg_assign(ctx, code, sizeof (code), "LIT(%s)", src0);
2888 output_line(ctx, "%s", code);
2889 } // emit_GLSL_LIT
2890
2891 static void emit_GLSL_DST(Context *ctx)
2892 {
2893 // !!! FIXME: needs to take ctx->dst_arg.writemask into account.
2894 char src0_y[64]; make_GLSL_srcarg_string_y(ctx, 0, src0_y, sizeof (src0_y));
2895 char src1_y[64]; make_GLSL_srcarg_string_y(ctx, 1, src1_y, sizeof (src1_y));
2896 char src0_z[64]; make_GLSL_srcarg_string_z(ctx, 0, src0_z, sizeof (src0_z));
2897 char src1_w[64]; make_GLSL_srcarg_string_w(ctx, 1, src1_w, sizeof (src1_w));
2898
2899 char code[128];
2900 make_GLSL_destarg_assign(ctx, code, sizeof (code),
2901 "vec4(1.0, %s * %s, %s, %s)",
2902 src0_y, src1_y, src0_z, src1_w);
2903 output_line(ctx, "%s", code);
2904 } // emit_GLSL_DST
2905
2906 static void emit_GLSL_LRP(Context *ctx)
2907 {
2908 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2909 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2910 char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2));
2911 char code[128];
2912 make_GLSL_destarg_assign(ctx, code, sizeof (code), "mix(%s, %s, %s)",
2913 src2, src1, src0);
2914 output_line(ctx, "%s", code);
2915 } // emit_GLSL_LRP
2916
2917 static void emit_GLSL_FRC(Context *ctx)
2918 {
2919 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2920 char code[128];
2921 make_GLSL_destarg_assign(ctx, code, sizeof (code), "fract(%s)", src0);
2922 output_line(ctx, "%s", code);
2923 } // emit_GLSL_FRC
2924
2925 static void emit_GLSL_M4X4(Context *ctx)
2926 {
2927 char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
2928 char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0));
2929 char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1));
2930 char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2));
2931 char row3[64]; make_GLSL_srcarg_string_full(ctx, 4, row3, sizeof (row3));
2932 char code[256];
2933 make_GLSL_destarg_assign(ctx, code, sizeof (code),
2934 "vec4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))",
2935 src0, row0, src0, row1, src0, row2, src0, row3);
2936 output_line(ctx, "%s", code);
2937 } // emit_GLSL_M4X4
2938
2939 static void emit_GLSL_M4X3(Context *ctx)
2940 {
2941 char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
2942 char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0));
2943 char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1));
2944 char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2));
2945 char code[256];
2946 make_GLSL_destarg_assign(ctx, code, sizeof (code),
2947 "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
2948 src0, row0, src0, row1, src0, row2);
2949 output_line(ctx, "%s", code);
2950 } // emit_GLSL_M4X3
2951
2952 static void emit_GLSL_M3X4(Context *ctx)
2953 {
2954 char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
2955 char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
2956 char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
2957 char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2));
2958 char row3[64]; make_GLSL_srcarg_string_vec3(ctx, 4, row3, sizeof (row3));
2959
2960 char code[256];
2961 make_GLSL_destarg_assign(ctx, code, sizeof (code),
2962 "vec4(dot(%s, %s), dot(%s, %s), "
2963 "dot(%s, %s), dot(%s, %s))",
2964 src0, row0, src0, row1,
2965 src0, row2, src0, row3);
2966 output_line(ctx, "%s", code);
2967 } // emit_GLSL_M3X4
2968
2969 static void emit_GLSL_M3X3(Context *ctx)
2970 {
2971 char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
2972 char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
2973 char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
2974 char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2));
2975 char code[256];
2976 make_GLSL_destarg_assign(ctx, code, sizeof (code),
2977 "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
2978 src0, row0, src0, row1, src0, row2);
2979 output_line(ctx, "%s", code);
2980 } // emit_GLSL_M3X3
2981
2982 static void emit_GLSL_M3X2(Context *ctx)
2983 {
2984 char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
2985 char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
2986 char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
2987
2988 char code[256];
2989 make_GLSL_destarg_assign(ctx, code, sizeof (code),
2990 "vec2(dot(%s, %s), dot(%s, %s))",
2991 src0, row0, src0, row1);
2992 output_line(ctx, "%s", code);
2993 } // emit_GLSL_M3X2
2994
2995 static void emit_GLSL_CALL(Context *ctx)
2996 {
2997 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2998 if (ctx->loops > 0)
2999 output_line(ctx, "%s(aL);", src0);
3000 else
3001 output_line(ctx, "%s();", src0);
3002 } // emit_GLSL_CALL
3003
3004 static void emit_GLSL_CALLNZ(Context *ctx)
3005 {
3006 // !!! FIXME: if src1 is a constbool that's true, we can remove the
3007 // !!! FIXME: if. If it's false, we can make this a no-op.
3008 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3009 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
3010
3011 if (ctx->loops > 0)
3012 output_line(ctx, "if (%s) { %s(aL); }", src1, src0);
3013 else
3014 output_line(ctx, "if (%s) { %s(); }", src1, src0);
3015 } // emit_GLSL_CALLNZ
3016
3017 static void emit_GLSL_LOOP(Context *ctx)
3018 {
3019 // !!! FIXME: swizzle?
3020 char var[64]; get_GLSL_srcarg_varname(ctx, 1, var, sizeof (var));
3021 assert(ctx->source_args[0].regnum == 0); // in case they add aL1 someday.
3022 output_line(ctx, "{");
3023 ctx->indent++;
3024 output_line(ctx, "const int aLend = %s.x + %s.y;", var, var);
3025 output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {", var, var);
3026 ctx->indent++;
3027 } // emit_GLSL_LOOP
3028
3029 static void emit_GLSL_RET(Context *ctx)
3030 {
3031 // thankfully, the MSDN specs say a RET _has_ to end a function...no
3032 // early returns. So if you hit one, you know you can safely close
3033 // a high-level function.
3034 ctx->indent--;
3035 output_line(ctx, "}");
3036 output_blank_line(ctx);
3037 set_output(ctx, &ctx->subroutines);
3038 } // emit_GLSL_RET
3039
3040 static void emit_GLSL_ENDLOOP(Context *ctx)
3041 {
3042 ctx->indent--;
3043 output_line(ctx, "}");
3044 ctx->indent--;
3045 output_line(ctx, "}");
3046 } // emit_GLSL_ENDLOOP
3047
3048 static void emit_GLSL_LABEL(Context *ctx)
3049 {
3050 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3051 const int label = ctx->source_args[0].regnum;
3052 RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
3053 assert(ctx->output == ctx->subroutines); // not mainline, etc.
3054 assert(ctx->indent == 0); // we shouldn't be in the middle of a function.
3055
3056 // MSDN specs say CALL* has to come before the LABEL, so we know if we
3057 // can ditch the entire function here as unused.
3058 if (reg == NULL)
3059 set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output.
3060
3061 // !!! FIXME: it would be nice if we could determine if a function is
3062 // !!! FIXME: only called once and, if so, forcibly inline it.
3063
3064 const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
3065 output_line(ctx, "void %s(%s)", src0, uses_loopreg);
3066 output_line(ctx, "{");
3067 ctx->indent++;
3068 } // emit_GLSL_LABEL
3069
3070 static void emit_GLSL_DCL(Context *ctx)
3071 {
3072 // no-op. We do this in our emit_attribute() and emit_uniform().
3073 } // emit_GLSL_DCL
3074
3075 static void emit_GLSL_POW(Context *ctx)
3076 {
3077 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3078 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
3079 char code[128];
3080 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3081 "pow(abs(%s), %s)", src0, src1);
3082 output_line(ctx, "%s", code);
3083 } // emit_GLSL_POW
3084
3085 static void emit_GLSL_CRS(Context *ctx)
3086 {
3087 // !!! FIXME: needs to take ctx->dst_arg.writemask into account.
3088 char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
3089 char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1));
3090 char code[128];
3091 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3092 "cross(%s, %s)", src0, src1);
3093 output_line(ctx, "%s", code);
3094 } // emit_GLSL_CRS
3095
3096 static void emit_GLSL_SGN(Context *ctx)
3097 {
3098 // (we don't need the temporary registers specified for the D3D opcode.)
3099 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3100 char code[128];
3101 make_GLSL_destarg_assign(ctx, code, sizeof (code), "sign(%s)", src0);
3102 output_line(ctx, "%s", code);
3103 } // emit_GLSL_SGN
3104
3105 static void emit_GLSL_ABS(Context *ctx)
3106 {
3107 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3108 char code[128];
3109 make_GLSL_destarg_assign(ctx, code, sizeof (code), "abs(%s)", src0);
3110 output_line(ctx, "%s", code);
3111 } // emit_GLSL_ABS
3112
3113 static void emit_GLSL_NRM(Context *ctx)
3114 {
3115 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3116 char code[128];
3117 make_GLSL_destarg_assign(ctx, code, sizeof (code), "normalize(%s)", src0);
3118 output_line(ctx, "%s", code);
3119 } // emit_GLSL_NRM
3120
3121 static void emit_GLSL_SINCOS(Context *ctx)
3122 {
3123 // we don't care about the temp registers that <= sm2 demands; ignore them.
3124 // sm2 also talks about what components are left untouched vs. undefined,
3125 // but we just leave those all untouched with GLSL write masks (which
3126 // would fulfill the "undefined" requirement, too).
3127 const int mask = ctx->dest_arg.writemask;
3128 char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
3129 char code[128] = { '\0' };
3130
3131 if (writemask_x(mask))
3132 make_GLSL_destarg_assign(ctx, code, sizeof (code), "cos(%s)", src0);
3133 else if (writemask_y(mask))
3134 make_GLSL_destarg_assign(ctx, code, sizeof (code), "sin(%s)", src0);
3135 else if (writemask_xy(mask))
3136 {
3137 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3138 "vec2(cos(%s), sin(%s))", src0, src0);
3139 } // else if
3140
3141 output_line(ctx, "%s", code);
3142 } // emit_GLSL_SINCOS
3143
3144 static void emit_GLSL_REP(Context *ctx)
3145 {
3146 // !!! FIXME:
3147 // msdn docs say legal loop values are 0 to 255. We can check DEFI values
3148 // at parse time, but if they are pulling a value from a uniform, do
3149 // we clamp here?
3150 // !!! FIXME: swizzle is legal here, right?
3151 char src0[64]; make_GLSL_srcarg_string_x(ctx, 0, src0, sizeof (src0));
3152 const uint rep = (uint) ctx->reps;
3153 output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {",
3154 rep, rep, src0, rep);
3155 ctx->indent++;
3156 } // emit_GLSL_REP
3157
3158 static void emit_GLSL_ENDREP(Context *ctx)
3159 {
3160 ctx->indent--;
3161 output_line(ctx, "}");
3162 } // emit_GLSL_ENDREP
3163
3164 static void emit_GLSL_IF(Context *ctx)
3165 {
3166 char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
3167 output_line(ctx, "if (%s) {", src0);
3168 ctx->indent++;
3169 } // emit_GLSL_IF
3170
3171 static void emit_GLSL_IFC(Context *ctx)
3172 {
3173 const char *comp = get_GLSL_comparison_string_scalar(ctx);
3174 char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
3175 char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1));
3176 output_line(ctx, "if (%s %s %s) {", src0, comp, src1);
3177 ctx->indent++;
3178 } // emit_GLSL_IFC
3179
3180 static void emit_GLSL_ELSE(Context *ctx)
3181 {
3182 ctx->indent--;
3183 output_line(ctx, "} else {");
3184 ctx->indent++;
3185 } // emit_GLSL_ELSE
3186
3187 static void emit_GLSL_ENDIF(Context *ctx)
3188 {
3189 ctx->indent--;
3190 output_line(ctx, "}");
3191 } // emit_GLSL_ENDIF
3192
3193 static void emit_GLSL_BREAK(Context *ctx)
3194 {
3195 output_line(ctx, "break;");
3196 } // emit_GLSL_BREAK
3197
3198 static void emit_GLSL_BREAKC(Context *ctx)
3199 {
3200 const char *comp = get_GLSL_comparison_string_scalar(ctx);
3201 char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
3202 char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1));
3203 output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1);
3204 } // emit_GLSL_BREAKC
3205
3206 static void emit_GLSL_MOVA(Context *ctx)
3207 {
3208 const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
3209 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3210 char code[128];
3211
3212 if (vecsize == 1)
3213 {
3214 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3215 "int(floor(abs(%s) + 0.5) * sign(%s))",
3216 src0, src0);
3217 } // if
3218
3219 else
3220 {
3221 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3222 "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s))",
3223 vecsize, src0, vecsize, src0);
3224 } // else
3225
3226 output_line(ctx, "%s", code);
3227 } // emit_GLSL_MOVA
3228
3229 static void emit_GLSL_DEFB(Context *ctx)
3230 {
3231 char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
3232 push_output(ctx, &ctx->globals);
3233 output_line(ctx, "const bool %s = %s;",
3234 varname, ctx->dwords[0] ? "true" : "false");
3235 pop_output(ctx);
3236 } // emit_GLSL_DEFB
3237
3238 static void emit_GLSL_DEFI(Context *ctx)
3239 {
3240 char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
3241 const int32 *x = (const int32 *) ctx->dwords;
3242 push_output(ctx, &ctx->globals);
3243 output_line(ctx, "const ivec4 %s = ivec4(%d, %d, %d, %d);",
3244 varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
3245 pop_output(ctx);
3246 } // emit_GLSL_DEFI
3247
3248 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
3249
3250 static void emit_GLSL_TEXKILL(Context *ctx)
3251 {
3252 char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3253 output_line(ctx, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;", dst);
3254 } // emit_GLSL_TEXKILL
3255
3256 static void glsl_texld(Context *ctx, const int texldd)
3257 {
3258 if (!shader_version_atleast(ctx, 1, 4))
3259 {
3260 DestArgInfo *info = &ctx->dest_arg;
3261 char dst[64];
3262 char sampler[64];
3263 char code[128] = {0};
3264
3265 assert(!texldd);
3266
3267 RegisterList *sreg;
3268 sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, info->regnum);
3269 const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
3270
3271 // !!! FIXME: this code counts on the register not having swizzles, etc.
3272 get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3273 get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3274 sampler, sizeof (sampler));
3275
3276 if (ttype == TEXTURE_TYPE_2D)
3277 {
3278 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3279 "texture2D(%s, %s.xy)",
3280 sampler, dst);
3281 }
3282 else if (ttype == TEXTURE_TYPE_CUBE)
3283 {
3284 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3285 "textureCube(%s, %s.xyz)",
3286 sampler, dst);
3287 }
3288 else if (ttype == TEXTURE_TYPE_VOLUME)
3289 {
3290 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3291 "texture3D(%s, %s.xyz)",
3292 sampler, dst);
3293 }
3294 else
3295 {
3296 fail(ctx, "unexpected texture type");
3297 } // else
3298 output_line(ctx, "%s", code);
3299 } // if
3300
3301 else if (!shader_version_atleast(ctx, 2, 0))
3302 {
3303 // ps_1_4 is different, too!
3304 fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME
3305 return;
3306 } // else if
3307
3308 else
3309 {
3310 const SourceArgInfo *samp_arg = &ctx->source_args[1];
3311 RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
3312 samp_arg->regnum);
3313 const char *funcname = NULL;
3314 char src0[64] = { '\0' };
3315 char src1[64]; get_GLSL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD?
3316 char src2[64] = { '\0' };
3317 char src3[64] = { '\0' };
3318
3319 if (sreg == NULL)
3320 {
3321 fail(ctx, "TEXLD using undeclared sampler");
3322 return;
3323 } // if
3324
3325 if (texldd)
3326 {
3327 make_GLSL_srcarg_string_vec2(ctx, 2, src2, sizeof (src2));
3328 make_GLSL_srcarg_string_vec2(ctx, 3, src3, sizeof (src3));
3329 } // if
3330
3331 // !!! FIXME: can TEXLDD set instruction_controls?
3332 // !!! FIXME: does the d3d bias value map directly to GLSL?
3333 const char *biassep = "";
3334 char bias[64] = { '\0' };
3335 if (ctx->instruction_controls == CONTROL_TEXLDB)
3336 {
3337 biassep = ", ";
3338 make_GLSL_srcarg_string_w(ctx, 0, bias, sizeof (bias));
3339 } // if
3340
3341 switch ((const TextureType) sreg->index)
3342 {
3343 case TEXTURE_TYPE_2D:
3344 if (ctx->instruction_controls == CONTROL_TEXLDP)
3345 {
3346 funcname = "texture2DProj";
3347 make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
3348 } // if
3349 else // texld/texldb
3350 {
3351 funcname = "texture2D";
3352 make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0));
3353 } // else
3354 break;
3355 case TEXTURE_TYPE_CUBE:
3356 if (ctx->instruction_controls == CONTROL_TEXLDP)
3357 fail(ctx, "TEXLDP on a cubemap"); // !!! FIXME: is this legal?
3358 funcname = "textureCube";
3359 make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
3360 break;
3361 case TEXTURE_TYPE_VOLUME:
3362 if (ctx->instruction_controls == CONTROL_TEXLDP)
3363 {
3364 funcname = "texture3DProj";
3365 make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
3366 } // if
3367 else // texld/texldb
3368 {
3369 funcname = "texture3D";
3370 make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
3371 } // else
3372 break;
3373 default:
3374 fail(ctx, "unknown texture type");
3375 return;
3376 } // switch
3377
3378 assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum));
3379 char swiz_str[6] = { '\0' };
3380 make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
3381 samp_arg->swizzle, ctx->dest_arg.writemask);
3382
3383 char code[128];
3384 if (texldd)
3385 {
3386 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3387 "%sGrad(%s, %s, %s, %s)%s", funcname,
3388 src1, src0, src2, src3, swiz_str);
3389 } // if
3390 else
3391 {
3392 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3393 "%s(%s, %s%s%s)%s", funcname,
3394 src1, src0, biassep, bias, swiz_str);
3395 } // else
3396
3397 output_line(ctx, "%s", code);
3398 } // else
3399 } // glsl_texld
3400
3401 static void emit_GLSL_TEXLD(Context *ctx)
3402 {
3403 glsl_texld(ctx, 0);
3404 } // emit_GLSL_TEXLD
3405
3406
3407 static void emit_GLSL_TEXBEM(Context *ctx)
3408 {
3409 DestArgInfo *info = &ctx->dest_arg;
3410 char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3411 char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src));
3412 char sampler[64];
3413 char code[512];
3414
3415 // !!! FIXME: this code counts on the register not having swizzles, etc.
3416 get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3417 sampler, sizeof (sampler));
3418
3419 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3420 "texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
3421 " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))",
3422 sampler,
3423 dst, sampler, src, sampler, src,
3424 dst, sampler, src, sampler, src);
3425
3426 output_line(ctx, "%s", code);
3427 } // emit_GLSL_TEXBEM
3428
3429
3430 static void emit_GLSL_TEXBEML(Context *ctx)
3431 {
3432 // !!! FIXME: this code counts on the register not having swizzles, etc.
3433 DestArgInfo *info = &ctx->dest_arg;
3434 char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3435 char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src));
3436 char sampler[64];
3437 char code[512];
3438
3439 get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3440 sampler, sizeof (sampler));
3441
3442 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3443 "(texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
3444 " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *"
3445 " ((%s.z * %s_texbeml.x) + %s_texbem.y)",
3446 sampler,
3447 dst, sampler, src, sampler, src,
3448 dst, sampler, src, sampler, src,
3449 src, sampler, sampler);
3450
3451 output_line(ctx, "%s", code);
3452 } // emit_GLSL_TEXBEML
3453
3454 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME
3455 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME
3456
3457
3458 static void emit_GLSL_TEXM3X2PAD(Context *ctx)
3459 {
3460 // no-op ... work happens in emit_GLSL_TEXM3X2TEX().
3461 } // emit_GLSL_TEXM3X2PAD
3462
3463 static void emit_GLSL_TEXM3X2TEX(Context *ctx)
3464 {
3465 if (ctx->texm3x2pad_src0 == -1)
3466 return;
3467
3468 DestArgInfo *info = &ctx->dest_arg;
3469 char dst[64];
3470 char src0[64];
3471 char src1[64];
3472 char src2[64];
3473 char sampler[64];
3474 char code[512];
3475
3476 // !!! FIXME: this code counts on the register not having swizzles, etc.
3477 get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3478 sampler, sizeof (sampler));
3479 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0,
3480 src0, sizeof (src0));
3481 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0,
3482 src1, sizeof (src1));
3483 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
3484 src2, sizeof (src2));
3485 get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3486
3487 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3488 "texture2D(%s, vec2(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz)))",
3489 sampler, src0, src1, src2, dst);
3490
3491 output_line(ctx, "%s", code);
3492 } // emit_GLSL_TEXM3X2TEX
3493
3494 static void emit_GLSL_TEXM3X3PAD(Context *ctx)
3495 {
3496 // no-op ... work happens in emit_GLSL_TEXM3X3*().
3497 } // emit_GLSL_TEXM3X3PAD
3498
3499 static void emit_GLSL_TEXM3X3TEX(Context *ctx)
3500 {
3501 if (ctx->texm3x3pad_src1 == -1)
3502 return;
3503
3504 DestArgInfo *info = &ctx->dest_arg;
3505 char dst[64];
3506 char src0[64];
3507 char src1[64];
3508 char src2[64];
3509 char src3[64];
3510 char src4[64];
3511 char sampler[64];
3512 char code[512];
3513
3514 // !!! FIXME: this code counts on the register not having swizzles, etc.
3515 get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3516 sampler, sizeof (sampler));
3517
3518 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
3519 src0, sizeof (src0));
3520 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
3521 src1, sizeof (src1));
3522 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
3523 src2, sizeof (src2));
3524 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
3525 src3, sizeof (src3));
3526 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
3527 src4, sizeof (src4));
3528 get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3529
3530 RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
3531 info->regnum);
3532 const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
3533 const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
3534
3535 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3536 "texture%s(%s,"
3537 " vec3(dot(%s.xyz, %s.xyz),"
3538 " dot(%s.xyz, %s.xyz),"
3539 " dot(%s.xyz, %s.xyz)))",
3540 ttypestr, sampler, src0, src1, src2, src3, dst, src4);
3541
3542 output_line(ctx, "%s", code);
3543 } // emit_GLSL_TEXM3X3TEX
3544
3545 static void emit_GLSL_TEXM3X3SPEC_helper(Context *ctx)
3546 {
3547 if (ctx->glsl_generated_texm3x3spec_helper)
3548 return;
3549
3550 ctx->glsl_generated_texm3x3spec_helper = 1;
3551
3552 push_output(ctx, &ctx->helpers);
3553 output_line(ctx, "vec3 TEXM3X3SPEC_reflection(const vec3 normal, const vec3 eyeray)");
3554 output_line(ctx, "{"); ctx->indent++;
3555 output_line(ctx, "return (2.0 * ((normal * eyeray) / (normal * normal)) * normal) - eyeray;"); ctx->indent--;
3556 output_line(ctx, "}");
3557 output_blank_line(ctx);
3558 pop_output(ctx);
3559 } // emit_GLSL_TEXM3X3SPEC_helper
3560
3561 static void emit_GLSL_TEXM3X3SPEC(Context *ctx)
3562 {
3563 if (ctx->texm3x3pad_src1 == -1)
3564 return;
3565
3566 DestArgInfo *info = &ctx->dest_arg;
3567 char dst[64];
3568 char src0[64];
3569 char src1[64];
3570 char src2[64];
3571 char src3[64];
3572 char src4[64];
3573 char src5[64];
3574 char sampler[64];
3575 char code[512];
3576
3577 emit_GLSL_TEXM3X3SPEC_helper(ctx);
3578
3579 // !!! FIXME: this code counts on the register not having swizzles, etc.
3580 get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3581 sampler, sizeof (sampler));
3582
3583 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
3584 src0, sizeof (src0));
3585 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
3586 src1, sizeof (src1));
3587 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
3588 src2, sizeof (src2));
3589 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
3590 src3, sizeof (src3));
3591 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
3592 src4, sizeof (src4));
3593 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum,
3594 src5, sizeof (src5));
3595 get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3596
3597 RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
3598 info->regnum);
3599 const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
3600 const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
3601
3602 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3603 "texture%s(%s, "
3604 "TEXM3X3SPEC_reflection("
3605 "vec3("
3606 "dot(%s.xyz, %s.xyz), "
3607 "dot(%s.xyz, %s.xyz), "
3608 "dot(%s.xyz, %s.xyz)"
3609 "),"
3610 "%s.xyz,"
3611 ")"
3612 ")",
3613 ttypestr, sampler, src0, src1, src2, src3, dst, src4, src5);
3614
3615 output_line(ctx, "%s", code);
3616 } // emit_GLSL_TEXM3X3SPEC
3617
3618 static void emit_GLSL_TEXM3X3VSPEC(Context *ctx)
3619 {
3620 if (ctx->texm3x3pad_src1 == -1)
3621 return;
3622
3623 DestArgInfo *info = &ctx->dest_arg;
3624 char dst[64];
3625 char src0[64];
3626 char src1[64];
3627 char src2[64];
3628 char src3[64];
3629 char src4[64];
3630 char sampler[64];
3631 char code[512];
3632
3633 emit_GLSL_TEXM3X3SPEC_helper(ctx);
3634
3635 // !!! FIXME: this code counts on the register not having swizzles, etc.
3636 get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3637 sampler, sizeof (sampler));
3638
3639 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
3640 src0, sizeof (src0));
3641 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
3642 src1, sizeof (src1));
3643 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
3644 src2, sizeof (src2));
3645 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
3646 src3, sizeof (src3));
3647 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
3648 src4, sizeof (src4));
3649 get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3650
3651 RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
3652 info->regnum);
3653 const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
3654 const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
3655
3656 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3657 "texture%s(%s, "
3658 "TEXM3X3SPEC_reflection("
3659 "vec3("
3660 "dot(%s.xyz, %s.xyz), "
3661 "dot(%s.xyz, %s.xyz), "
3662 "dot(%s.xyz, %s.xyz)"
3663 "), "
3664 "vec3(%s.w, %s.w, %s.w)"
3665 ")"
3666 ")",
3667 ttypestr, sampler, src0, src1, src2, src3, dst, src4, src0, src2, dst);
3668
3669 output_line(ctx, "%s", code);
3670 } // emit_GLSL_TEXM3X3VSPEC
3671
3672 static void emit_GLSL_EXPP(Context *ctx)
3673 {
3674 // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation.
3675 emit_GLSL_EXP(ctx); // I guess this is just partial precision EXP?
3676 } // emit_GLSL_EXPP
3677
3678 static void emit_GLSL_LOGP(Context *ctx)
3679 {
3680 // LOGP is just low-precision LOG, but we'll take the higher precision.
3681 emit_GLSL_LOG(ctx);
3682 } // emit_GLSL_LOGP
3683
3684 // common code between CMP and CND.
3685 static void emit_GLSL_comparison_operations(Context *ctx, const char *cmp)
3686 {
3687 int i, j;
3688 DestArgInfo *dst = &ctx->dest_arg;
3689 const SourceArgInfo *srcarg0 = &ctx->source_args[0];
3690 const int origmask = dst->writemask;
3691 int used_swiz[4] = { 0, 0, 0, 0 };
3692 const int writemask[4] = { dst->writemask0, dst->writemask1,
3693 dst->writemask2, dst->writemask3 };
3694 const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y,
3695 srcarg0->swizzle_z, srcarg0->swizzle_w };
3696
3697 for (i = 0; i < 4; i++)
3698 {
3699 int mask = (1 << i);
3700
3701 if (!writemask[i]) continue;
3702 if (used_swiz[i]) continue;
3703
3704 // This is a swizzle we haven't checked yet.
3705 used_swiz[i] = 1;
3706
3707 // see if there are any other elements swizzled to match (.yyyy)
3708 for (j = i + 1; j < 4; j++)
3709 {
3710 if (!writemask[j]) continue;
3711 if (src0swiz[i] != src0swiz[j]) continue;
3712 mask |= (1 << j);
3713 used_swiz[j] = 1;
3714 } // for
3715
3716 // okay, (mask) should be the writemask of swizzles we like.
3717
3718 //return make_GLSL_srcarg_string(ctx, idx, (1 << 0));
3719
3720 char src0[64];
3721 char src1[64];
3722 char src2[64];
3723 make_GLSL_srcarg_string(ctx, 0, (1 << i), src0, sizeof (src0));
3724 make_GLSL_srcarg_string(ctx, 1, mask, src1, sizeof (src1));
3725 make_GLSL_srcarg_string(ctx, 2, mask, src2, sizeof (src2));
3726
3727 set_dstarg_writemask(dst, mask);
3728
3729 char code[128];
3730 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3731 "((%s %s) ? %s : %s)",
3732 src0, cmp, src1, src2);
3733 output_line(ctx, "%s", code);
3734 } // for
3735
3736 set_dstarg_writemask(dst, origmask);
3737 } // emit_GLSL_comparison_operations
3738
3739 static void emit_GLSL_CND(Context *ctx)
3740 {
3741 emit_GLSL_comparison_operations(ctx, "> 0.5");
3742 } // emit_GLSL_CND
3743
3744 static void emit_GLSL_DEF(Context *ctx)
3745 {
3746 const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
3747 char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
3748 char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1);
3749 char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1);
3750 char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1);
3751 char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1);
3752
3753 push_output(ctx, &ctx->globals);
3754 output_line(ctx, "const vec4 %s = vec4(%s, %s, %s, %s);",
3755 varname, val0, val1, val2, val3);
3756 pop_output(ctx);
3757 } // emit_GLSL_DEF
3758
3759 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME
3760 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME
3761 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME
3762 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME
3763
3764 static void emit_GLSL_TEXM3X3(Context *ctx)
3765 {
3766 if (ctx->texm3x3pad_src1 == -1)
3767 return;
3768
3769 char dst[64];
3770 char src0[64];
3771 char src1[64];
3772 char src2[64];
3773 char src3[64];
3774 char src4[64];
3775 char code[512];
3776
3777 // !!! FIXME: this code counts on the register not having swizzles, etc.
3778 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
3779 src0, sizeof (src0));
3780 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
3781 src1, sizeof (src1));
3782 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
3783 src2, sizeof (src2));
3784 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
3785 src3, sizeof (src3));
3786 get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
3787 src4, sizeof (src4));
3788 get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3789
3790 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3791 "vec4(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), 1.0)",
3792 src0, src1, src2, src3, dst, src4);
3793
3794 output_line(ctx, "%s", code);
3795 } // emit_GLSL_TEXM3X3
3796
3797 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME
3798
3799 static void emit_GLSL_CMP(Context *ctx)
3800 {
3801 emit_GLSL_comparison_operations(ctx, ">= 0.0");
3802 } // emit_GLSL_CMP
3803
3804 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME
3805
3806 static void emit_GLSL_DP2ADD(Context *ctx)
3807 {
3808 char src0[64]; make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0));
3809 char src1[64]; make_GLSL_srcarg_string_vec2(ctx, 1, src1, sizeof (src1));
3810 char src2[64]; make_GLSL_srcarg_string_scalar(ctx, 2, src2, sizeof (src2));
3811 char extra[64]; snprintf(extra, sizeof (extra), " + %s", src2);
3812 emit_GLSL_dotprod(ctx, src0, src1, extra);
3813 } // emit_GLSL_DP2ADD
3814
3815 static void emit_GLSL_DSX(Context *ctx)
3816 {
3817 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3818 char code[128];
3819 make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdx(%s)", src0);
3820 output_line(ctx, "%s", code);
3821 } // emit_GLSL_DSX
3822
3823 static void emit_GLSL_DSY(Context *ctx)
3824 {
3825 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3826 char code[128];
3827 make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdy(%s)", src0);
3828 output_line(ctx, "%s", code);
3829 } // emit_GLSL_DSY
3830
3831 static void emit_GLSL_TEXLDD(Context *ctx)
3832 {
3833 // !!! FIXME:
3834 // GLSL 1.30 introduced textureGrad() for this, but it looks like the
3835 // functions are overloaded instead of texture2DGrad() (etc).
3836
3837 // GL_shader_texture_lod and GL_EXT_gpu_shader4 added texture2DGrad*(),
3838 // so we'll use them if available. Failing that, we'll just fallback
3839 // to a regular texture2D call and hope the mipmap it chooses is close
3840 // enough.
3841 if (!ctx->glsl_generated_texldd_setup)
3842 {
3843 ctx->glsl_generated_texldd_setup = 1;
3844 push_output(ctx, &ctx->preflight);
3845 output_line(ctx, "#if GL_ARB_shader_texture_lod");
3846 output_line(ctx, "#extension GL_ARB_shader_texture_lod : enable");
3847 output_line(ctx, "#define texture2DGrad texture2DGradARB");
3848 output_line(ctx, "#define texture2DProjGrad texture2DProjARB");
3849 output_line(ctx, "#elif GL_EXT_gpu_shader4");
3850 output_line(ctx, "#extension GL_EXT_gpu_shader4 : enable");
3851 output_line(ctx, "#else");
3852 output_line(ctx, "#define texture2DGrad(a,b,c,d) texture2D(a,b)");
3853 output_line(ctx, "#define texture2DProjGrad(a,b,c,d) texture2DProj(a,b)");
3854 output_line(ctx, "#endif");
3855 output_blank_line(ctx);
3856 pop_output(ctx);
3857 } // if
3858
3859 glsl_texld(ctx, 1);
3860 } // emit_GLSL_TEXLDD
3861
3862 static void emit_GLSL_SETP(Context *ctx)
3863 {
3864 const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
3865 char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3866 char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
3867 char code[128];
3868
3869 // destination is always predicate register (which is type bvec4).
3870 if (vecsize == 1)
3871 {
3872 const char *comp = get_GLSL_comparison_string_scalar(ctx);
3873 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3874 "(%s %s %s)", src0, comp, src1);
3875 } // if
3876 else
3877 {
3878 const char *comp = get_GLSL_comparison_string_vector(ctx);
3879 make_GLSL_destarg_assign(ctx, code, sizeof (code),
3880 "%s(%s, %s)", comp, src0, src1);
3881 } // else
3882
3883 output_line(ctx, "%s", code);
3884 } // emit_GLSL_SETP
3885
3886 static void emit_GLSL_TEXLDL(Context *ctx)
3887 {
3888 // !!! FIXME: The spec says we can't use GLSL's texture*Lod() built-ins
3889 // !!! FIXME: from fragment shaders for some inexplicable reason.
3890 // !!! FIXME: For now, you'll just have to suffer with the potentially
3891 // !!! FIXME: wrong mipmap until I can figure something out.
3892 emit_GLSL_TEXLD(ctx);
3893 } // emit_GLSL_TEXLDL
3894
3895 static void emit_GLSL_BREAKP(Context *ctx)
3896 {
3897 char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
3898 output_line(ctx, "if (%s) { break; }", src0);
3899 } // emit_GLSL_BREAKP
3900
3901 static void emit_GLSL_RESERVED(Context *ctx)
3902 {
3903 // do nothing; fails in the state machine.
3904 } // emit_GLSL_RESERVED
3905
3906 #endif // SUPPORT_PROFILE_GLSL
3907
3908
3909
3910 #if !SUPPORT_PROFILE_ARB1
3911 #define PROFILE_EMITTER_ARB1(op)
3912 #else
3913 #undef AT_LEAST_ONE_PROFILE
3914 #define AT_LEAST_ONE_PROFILE 1
3915 #define PROFILE_EMITTER_ARB1(op) emit_ARB1_##op,
3916
3917 static inline const char *get_ARB1_register_string(Context *ctx,
3918 const RegisterType regtype, const int regnum,
3919 char *regnum_str, const size_t regnum_size)
3920 {
3921 // turns out these are identical at the moment.
3922 return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
3923 } // get_ARB1_register_string
3924
3925 static const char *allocate_ARB1_scratch_reg_name(Context *ctx, char *buf,
3926 const size_t buflen)
3927 {
3928 const int scratch = allocate_scratch_register(ctx);
3929 snprintf(buf, buflen, "scratch%d", scratch);
3930 return buf;
3931 } // allocate_ARB1_scratch_reg_name
3932
3933 static inline const char *get_ARB1_branch_label_name(Context *ctx, const int id,
3934 char *buf, const size_t buflen)
3935 {
3936 snprintf(buf, buflen, "branch_label%d", id);
3937 return buf;
3938 } // get_ARB1_branch_label_name
3939
3940 static const char *get_ARB1_varname_in_buf(Context *ctx, const RegisterType rt,
3941 const int regnum, char *buf,
3942 const size_t buflen)
3943 {
3944 // turns out these are identical at the moment.
3945 return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen);
3946 } // get_ARB1_varname_in_buf
3947
3948 static const char *get_ARB1_varname(Context *ctx, const RegisterType rt,
3949 const int regnum)
3950 {
3951 // turns out these are identical at the moment.
3952 return get_D3D_varname(ctx, rt, regnum);
3953 } // get_ARB1_varname
3954
3955
3956 static inline const char *get_ARB1_const_array_varname_in_buf(Context *ctx,
3957 const int base, const int size,
3958 char *buf, const size_t buflen)
3959 {
3960 snprintf(buf, buflen, "c_array_%d_%d", base, size);
3961 return buf;
3962 } // get_ARB1_const_array_varname_in_buf
3963
3964
3965 static const char *get_ARB1_const_array_varname(Context *ctx, int base, int size)
3966 {
3967 char buf[64];
3968 get_ARB1_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
3969 return StrDup(ctx, buf);
3970 } // get_ARB1_const_array_varname
3971
3972
3973 static const char *make_ARB1_srcarg_string_in_buf(Context *ctx,
3974 const SourceArgInfo *arg,
3975 char *buf, size_t buflen)
3976 {
3977 // !!! FIXME: this can hit pathological cases where we look like this...
3978 //
3979 // dp3 r1.xyz, t0_bx2, t0_bx2
3980 // mad r1.xyz, t0_bias, 1-r1, t0_bx2
3981 //
3982 // ...which do a lot of duplicate work in arb1...
3983 //
3984 // SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
3985 // MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 };
3986 // SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 };
3987 // MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 };
3988 // DP3 r1.xyz, scratch0, scratch1;
3989 // SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
3990 // SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1;
3991 // SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 };
3992 // MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 };
3993 // MAD r1.xyz, scratch0, scratch1, scratch2;
3994 //
3995 // ...notice that the dp3 calculates the same value into two scratch
3996 // registers. This case is easier to handle; just see if multiple
3997 // source args are identical, build it up once, and use the same
3998 // scratch register for multiple arguments in that opcode.
3999 // Even better still, only calculate things once across instructions,
4000 // and be smart about letting it linger in a scratch register until we
4001 // definitely don't need the calculation anymore. That's harder to
4002 // write, though.
4003
4004 char regnum_str[16] = { '\0' };
4005
4006 // !!! FIXME: use get_ARB1_varname_in_buf() instead?
4007 const char *regtype_str = NULL;
4008 if (!arg->relative)
4009 {
4010 regtype_str = get_ARB1_register_string(ctx, arg->regtype,
4011 arg->regnum, regnum_str,
4012 sizeof (regnum_str));
4013 } // if
4014
4015 const char *rel_lbracket = "";
4016 char rel_offset[32] = { '\0' };
4017 const char *rel_rbracket = "";
4018 char rel_swizzle[4] = { '\0' };
4019 const char *rel_regtype_str = "";
4020 if (arg->relative)
4021 {
4022 rel_regtype_str = get_ARB1_varname_in_buf(ctx, arg->relative_regtype,
4023 arg->relative_regnum,
4024 (char *) alloca(64), 64);
4025
4026 rel_swizzle[0] = '.';
4027 rel_swizzle[1] = swizzle_channels[arg->relative_component];
4028 rel_swizzle[2] = '\0';
4029
4030 if (!support_nv2(ctx))
4031 {
4032 // The address register in ARB1 only allows the '.x' component, so
4033 // we need to load the component we need from a temp vector
4034 // register into .x as needed.
4035 assert(arg->relative_regtype == REG_TYPE_ADDRESS);
4036 assert(arg->relative_regnum == 0);
4037 if (ctx->last_address_reg_component != arg->relative_component)
4038 {
4039 output_line(ctx, "ARL %s.x, addr%d.%c;", rel_regtype_str,
4040 arg->relative_regnum,
4041 swizzle_channels[arg->relative_component]);
4042 ctx->last_address_reg_component = arg->relative_component;
4043 } // if
4044
4045 rel_swizzle[1] = 'x';
4046 } // if
4047
4048 if (arg->regtype == REG_TYPE_INPUT)
4049 regtype_str = "vertex.attrib";
4050 else
4051 {
4052 assert(arg->regtype == REG_TYPE_CONST);
4053 const int arrayidx = arg->relative_array->index;
4054 const int arraysize = arg->relative_array->count;
4055 const int offset = arg->regnum - arrayidx;
4056 assert(offset >= 0);
4057 regtype_str = get_ARB1_const_array_varname_in_buf(ctx, arrayidx,
4058 arraysize, (char *) alloca(64), 64);
4059 if (offset != 0)
4060 snprintf(rel_offset, sizeof (rel_offset), " + %d", offset);
4061 } // else
4062
4063 rel_lbracket = "[";
4064 rel_rbracket = "]";
4065 } // if
4066
4067 // This is the source register with everything but swizzle and source mods.
4068 snprintf(buf, buflen, "%s%s%s%s%s%s%s", regtype_str, regnum_str,
4069 rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset,
4070 rel_rbracket);
4071
4072 // Some of the source mods need to generate instructions to a temp
4073 // register, in which case we'll replace the register name.
4074 const SourceMod mod = arg->src_mod;
4075 const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) ||
4076 ((mod == SRCMOD_ABS) && support_nv2(ctx)) );
4077
4078 if (!inplace)
4079 {
4080 const size_t len = 64;
4081 char *stackbuf = (char *) alloca(len);
4082 regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len);
4083 regnum_str[0] = '\0'; // move value to scratch register.
4084 rel_lbracket = ""; // scratch register won't use array.
4085 rel_rbracket = "";
4086 rel_offset[0] = '\0';
4087 rel_swizzle[0] = '\0';
4088 rel_regtype_str = "";
4089 } // if
4090
4091 const char *premod_str = "";
4092 const char *postmod_str = "";
4093 switch (mod)
4094 {
4095 case SRCMOD_NEGATE:
4096 premod_str = "-";
4097 break;
4098
4099 case SRCMOD_BIASNEGATE:
4100 premod_str = "-";
4101 // fall through.
4102 case SRCMOD_BIAS:
4103 output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };",
4104 regtype_str, buf);
4105 break;
4106
4107 case SRCMOD_SIGNNEGATE:
4108 premod_str = "-";
4109 // fall through.
4110 case SRCMOD_SIGN:
4111 output_line(ctx,
4112 "MAD %s, %s, { 2.0, 2.0, 2.0, 2.0 }, { -1.0, -1.0, -1.0, -1.0 };",
4113 regtype_str, buf);
4114 break;
4115
4116 case SRCMOD_COMPLEMENT:
4117 output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;",
4118 regtype_str, buf);
4119 break;
4120
4121 case SRCMOD_X2NEGATE:
4122 premod_str = "-";
4123 // fall through.
4124 case SRCMOD_X2:
4125 output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };",
4126 regtype_str, buf);
4127 break;
4128
4129 case SRCMOD_DZ:
4130 fail(ctx, "SRCMOD_DZ currently unsupported in arb1");
4131 postmod_str = "_dz";
4132 break;
4133
4134 case SRCMOD_DW:
4135 fail(ctx, "SRCMOD_DW currently unsupported in arb1");
4136 postmod_str = "_dw";
4137 break;
4138
4139 case SRCMOD_ABSNEGATE:
4140 premod_str = "-";
4141 // fall through.
4142 case SRCMOD_ABS:
4143 if (!support_nv2(ctx)) // GL_NV_vertex_program2_option adds this.
4144 output_line(ctx, "ABS %s, %s;", regtype_str, buf);
4145 else
4146 {
4147 premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|";
4148 postmod_str = "|";
4149 } // else
4150 break;
4151
4152 case SRCMOD_NOT:
4153 fail(ctx, "SRCMOD_NOT currently unsupported in arb1");
4154 premod_str = "!";
4155 break;
4156
4157 case SRCMOD_NONE:
4158 case SRCMOD_TOTAL:
4159 break; // stop compiler whining.
4160 } // switch
4161
4162 char swizzle_str[6];
4163 size_t i = 0;
4164
4165 if (support_nv4(ctx)) // vFace must be output as "vFace.x" in nv4.
4166 {
4167 if (arg->regtype == REG_TYPE_MISCTYPE)
4168 {
4169 if ( ((const MiscTypeType) arg->regnum) == MISCTYPE_TYPE_FACE )
4170 {
4171 swizzle_str[i++] = '.';
4172 swizzle_str[i++] = 'x';
4173 } // if
4174 } // if
4175 } // if
4176
4177 const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
4178 if (!scalar && !no_swizzle(arg->swizzle))
4179 {
4180 swizzle_str[i++] = '.';
4181
4182 // .xxxx is the same as .x, but .xx is illegal...scalar or full!
4183 if (replicate_swizzle(arg->swizzle))
4184 swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
4185 else
4186 {
4187 swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
4188 swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
4189 swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
4190 swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
4191 } // else
4192 } // if
4193 swizzle_str[i] = '\0';
4194 assert(i < sizeof (swizzle_str));
4195
4196 snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", premod_str,
4197 regtype_str, regnum_str, rel_lbracket,
4198 rel_regtype_str, rel_swizzle, rel_offset, rel_rbracket,
4199 swizzle_str, postmod_str);
4200 // !!! FIXME: make sure the scratch buffer was large enough.
4201 return buf;
4202 } // make_ARB1_srcarg_string_in_buf
4203
4204 static const char *get_ARB1_destarg_varname(Context *ctx, char *buf,
4205 const size_t buflen)
4206 {
4207 const DestArgInfo *arg = &ctx->dest_arg;
4208 return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
4209 } // get_ARB1_destarg_varname
4210
4211 static const char *get_ARB1_srcarg_varname(Context *ctx, const size_t idx,
4212 char *buf, const size_t buflen)
4213 {
4214 if (idx >= STATICARRAYLEN(ctx->source_args))
4215 {
4216 fail(ctx, "Too many source args");
4217 *buf = '\0';
4218 return buf;
4219 } // if
4220
4221 const SourceArgInfo *arg = &ctx->source_args[idx];
4222 return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
4223 } // get_ARB1_srcarg_varname
4224
4225
4226 static const char *make_ARB1_destarg_string(Context *ctx, char *buf,
4227 const size_t buflen)
4228 {
4229 const DestArgInfo *arg = &ctx->dest_arg;
4230
4231 *buf = '\0';
4232
4233 const char *sat_str = "";
4234 if (arg->result_mod & MOD_SATURATE)
4235 {
4236 // nv4 can use ".SAT" in all program types.
4237 // For less than nv4, the "_SAT" modifier is only available in
4238 // fragment shaders. Every thing else will fake it later in
4239 // emit_ARB1_dest_modifiers() ...
4240 if (support_nv4(ctx))
4241 sat_str = ".SAT";
4242 else if (shader_is_pixel(ctx))
4243 sat_str = "_SAT";
4244 } // if
4245
4246 const char *pp_str = "";
4247 if (arg->result_mod & MOD_PP)
4248 {
4249 // Most ARB1 profiles can't do partial precision (MOD_PP), but that's
4250 // okay. The spec says lots of Direct3D implementations ignore the
4251 // flag anyhow.
4252 if (support_nv4(ctx))
4253 pp_str = "H";
4254 } // if
4255
4256 // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
4257 assert((arg->result_mod & MOD_CENTROID) == 0);
4258
4259 char regnum_str[16];
4260 const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype,
4261 arg->regnum, regnum_str,
4262 sizeof (regnum_str));
4263 if (regtype_str == NULL)
4264 {
4265 fail(ctx, "Unknown destination register type.");
4266 return buf;
4267 } // if
4268
4269 char writemask_str[6];
4270 size_t i = 0;
4271 const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
4272 if (!scalar && !writemask_xyzw(arg->writemask))
4273 {
4274 writemask_str[i++] = '.';
4275 if (arg->writemask0) writemask_str[i++] = 'x';
4276 if (arg->writemask1) writemask_str[i++] = 'y';
4277 if (arg->writemask2) writemask_str[i++] = 'z';
4278 if (arg->writemask3) writemask_str[i++] = 'w';
4279 } // if
4280 writemask_str[i] = '\0';
4281 assert(i < sizeof (writemask_str));
4282
4283 const char *pred_left = "";
4284 const char *pred_right = "";
4285 char pred[32] = { '\0' };
4286 if (ctx->predicated)
4287 {
4288 fail(ctx, "dest register predication currently unsupported in arb1");
4289 return buf;
4290 pred_left = "(";
4291 pred_right = ") ";
4292 make_ARB1_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
4293 pred, sizeof (pred));
4294 } // if
4295
4296 snprintf(buf, buflen, "%s%s %s%s%s", pp_str, sat_str,
4297 regtype_str, regnum_str, writemask_str);
4298 // !!! FIXME: make sure the scratch buffer was large enough.
4299 return buf;
4300 } // make_ARB1_destarg_string
4301
4302
4303 static void emit_ARB1_dest_modifiers(Context *ctx)
4304 {
4305 const DestArgInfo *arg = &ctx->dest_arg;
4306
4307 if (arg->result_shift != 0x0)
4308 {
4309 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4310 const char *multiplier = NULL;
4311
4312 switch (arg->result_shift)
4313 {
4314 case 0x1: multiplier = "2.0"; break;
4315 case 0x2: multiplier = "4.0"; break;
4316 case 0x3: multiplier = "8.0"; break;
4317 case 0xD: multiplier = "0.125"; break;
4318 case 0xE: multiplier = "0.25"; break;
4319 case 0xF: multiplier = "0.5"; break;
4320 } // switch
4321
4322 if (multiplier != NULL)
4323 {
4324 char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
4325 output_line(ctx, "MUL%s, %s, %s;", dst, var, multiplier);
4326 } // if
4327 } // if
4328
4329 if (arg->result_mod & MOD_SATURATE)
4330 {
4331 // nv4 and/or pixel shaders just used the "SAT" modifier, instead.
4332 if ( (!support_nv4(ctx)) && (!shader_is_pixel(ctx)) )
4333 {
4334 char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
4335 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4336 output_line(ctx, "MIN%s, %s, 1.0;", dst, var);
4337 output_line(ctx, "MAX%s, %s, 0.0;", dst, var);
4338 } // if
4339 } // if
4340 } // emit_ARB1_dest_modifiers
4341
4342
4343 static const char *make_ARB1_srcarg_string(Context *ctx, const size_t idx,
4344 char *buf, const size_t buflen)
4345 {
4346 if (idx >= STATICARRAYLEN(ctx->source_args))
4347 {
4348 fail(ctx, "Too many source args");
4349 *buf = '\0';
4350 return buf;
4351 } // if
4352
4353 const SourceArgInfo *arg = &ctx->source_args[idx];
4354 return make_ARB1_srcarg_string_in_buf(ctx, arg, buf, buflen);
4355 } // make_ARB1_srcarg_string
4356
4357 static void emit_ARB1_opcode_ds(Context *ctx, const char *opcode)
4358 {
4359 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4360 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
4361 output_line(ctx, "%s%s, %s;", opcode, dst, src0);
4362 emit_ARB1_dest_modifiers(ctx);
4363 } // emit_ARB1_opcode_ds
4364
4365 static void emit_ARB1_opcode_dss(Context *ctx, const char *opcode)
4366 {
4367 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4368 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
4369 char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
4370 output_line(ctx, "%s%s, %s, %s;", opcode, dst, src0, src1);
4371 emit_ARB1_dest_modifiers(ctx);
4372 } // emit_ARB1_opcode_dss
4373
4374 static void emit_ARB1_opcode_dsss(Context *ctx, const char *opcode)
4375 {
4376 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4377 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
4378 char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
4379 char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
4380 output_line(ctx, "%s%s, %s, %s, %s;", opcode, dst, src0, src1, src2);
4381 emit_ARB1_dest_modifiers(ctx);
4382 } // emit_ARB1_opcode_dsss
4383
4384
4385 #define EMIT_ARB1_OPCODE_FUNC(op) \
4386 static void emit_ARB1_##op(Context *ctx) { \
4387 emit_ARB1_opcode(ctx, #op); \
4388 }
4389 #define EMIT_ARB1_OPCODE_D_FUNC(op) \
4390 static void emit_ARB1_##op(Context *ctx) { \
4391 emit_ARB1_opcode_d(ctx, #op); \
4392 }
4393 #define EMIT_ARB1_OPCODE_S_FUNC(op) \
4394 static void emit_ARB1_##op(Context *ctx) { \
4395 emit_ARB1_opcode_s(ctx, #op); \
4396 }
4397 #define EMIT_ARB1_OPCODE_SS_FUNC(op) \
4398 static void emit_ARB1_##op(Context *ctx) { \
4399 emit_ARB1_opcode_ss(ctx, #op); \
4400 }
4401 #define EMIT_ARB1_OPCODE_DS_FUNC(op) \
4402 static void emit_ARB1_##op(Context *ctx) { \
4403 emit_ARB1_opcode_ds(ctx, #op); \
4404 }
4405 #define EMIT_ARB1_OPCODE_DSS_FUNC(op) \
4406 static void emit_ARB1_##op(Context *ctx) { \
4407 emit_ARB1_opcode_dss(ctx, #op); \
4408 }
4409 #define EMIT_ARB1_OPCODE_DSSS_FUNC(op) \
4410 static void emit_ARB1_##op(Context *ctx) { \
4411 emit_ARB1_opcode_dsss(ctx, #op); \
4412 }
4413 #define EMIT_ARB1_OPCODE_DSSSS_FUNC(op) \
4414 static void emit_ARB1_##op(Context *ctx) { \
4415 emit_ARB1_opcode_dssss(ctx, #op); \
4416 }
4417 #define EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(op) \
4418 static void emit_ARB1_##op(Context *ctx) { \
4419 failf(ctx, #op " unimplemented in %s profile", ctx->profile->name); \
4420 }
4421
4422
4423 static void emit_ARB1_start(Context *ctx, const char *profilestr)
4424 {
4425 const char *shader_str = NULL;
4426 const char *shader_full_str = NULL;
4427 if (shader_is_vertex(ctx))
4428 {
4429 shader_str = "vp";
4430 shader_full_str = "vertex";
4431 } // if
4432 else if (shader_is_pixel(ctx))
4433 {
4434 shader_str = "fp";
4435 shader_full_str = "fragment";
4436 } // else if
4437 else
4438 {
4439 failf(ctx, "Shader type %u unsupported in this profile.",
4440 (uint) ctx->shader_type);
4441 return;
4442 } // if
4443
4444 set_output(ctx, &ctx->preflight);
4445
4446 if (strcmp(profilestr, MOJOSHADER_PROFILE_ARB1) == 0)
4447 output_line(ctx, "!!ARB%s1.0", shader_str);
4448
4449 #if SUPPORT_PROFILE_ARB1_NV
4450 else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV2) == 0)
4451 {
4452 ctx->profile_supports_nv2 = 1;
4453 output_line(ctx, "!!ARB%s1.0", shader_str);
4454 output_line(ctx, "OPTION NV_%s_program2;", shader_full_str);
4455 } // else if
4456
4457 else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV3) == 0)
4458 {
4459 // there's no NV_fragment_program3, so just use 2.
4460 const int ver = shader_is_pixel(ctx) ? 2 : 3;
4461 ctx->profile_supports_nv2 = 1;
4462 ctx->profile_supports_nv3 = 1;
4463 output_line(ctx, "!!ARB%s1.0", shader_str);
4464 output_line(ctx, "OPTION NV_%s_program%d;", shader_full_str, ver);
4465 } // else if
4466
4467 else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV4) == 0)
4468 {
4469 ctx->profile_supports_nv2 = 1;
4470 ctx->profile_supports_nv3 = 1;
4471 ctx->profile_supports_nv4 = 1;
4472 output_line(ctx, "!!NV%s4.0", shader_str);
4473 } // else if
4474 #endif
4475
4476 else
4477 {
4478 failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
4479 } // else
4480
4481 set_output(ctx, &ctx->mainline);
4482 } // emit_ARB1_start
4483
4484 static void emit_ARB1_end(Context *ctx)
4485 {
4486 // ps_1_* writes color to r0 instead oC0. We move it to the right place.
4487 // We don't have to worry about a RET opcode messing this up, since
4488 // RET isn't available before ps_2_0.
4489 if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
4490 {
4491 set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
4492 output_line(ctx, "MOV oC0, r0;");
4493 } // if
4494
4495 output_line(ctx, "END");
4496 } // emit_ARB1_end
4497
4498 static void emit_ARB1_phase(Context *ctx)
4499 {
4500 // no-op in arb1.
4501 } // emit_ARB1_phase
4502
4503 static inline const char *arb1_float_temp(const Context *ctx)
4504 {
4505 // nv4 lets you specify data type.
4506 return (support_nv4(ctx)) ? "FLOAT TEMP" : "TEMP";
4507 } // arb1_float_temp
4508
4509 static void emit_ARB1_finalize(Context *ctx)
4510 {
4511 push_output(ctx, &ctx->preflight);
4512
4513 if (shader_is_vertex(ctx) && !ctx->arb1_wrote_position)
4514 output_line(ctx, "OPTION ARB_position_invariant;");
4515
4516 if (shader_is_pixel(ctx) && ctx->have_multi_color_outputs)
4517 output_line(ctx, "OPTION ARB_draw_buffers;");
4518
4519 pop_output(ctx);
4520
4521 const char *tmpstr = arb1_float_temp(ctx);
4522 int i;
4523 push_output(ctx, &ctx->globals);
4524 for (i = 0; i < ctx->max_scratch_registers; i++)
4525 {
4526 char buf[64];
4527 allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
4528 output_line(ctx, "%s %s;", tmpstr, buf);
4529 } // for
4530
4531 // nv2 fragment programs (and anything nv4) have a real REP/ENDREP.
4532 if ( (support_nv2(ctx)) && (!shader_is_pixel(ctx)) && (!support_nv4(ctx)) )
4533 {
4534 // set up temps for nv2 REP/ENDREP emulation through branching.
4535 for (i = 0; i < ctx->max_reps; i++)
4536 output_line(ctx, "TEMP rep%d;", i);
4537 } // if
4538
4539 pop_output(ctx);
4540 assert(ctx->scratch_registers == ctx->max_scratch_registers);
4541 } // emit_ARB1_finalize
4542
4543 static void emit_ARB1_global(Context *ctx, RegisterType regtype, int regnum)
4544 {
4545 // !!! FIXME: dependency on ARB1 profile. // !!! FIXME about FIXME: huh?
4546 char varname[64];
4547 get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
4548
4549 push_output(ctx, &ctx->globals);
4550 switch (regtype)
4551 {
4552 case REG_TYPE_ADDRESS:
4553 if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE.
4554 {
4555 // We have to map texture registers to temps for ps_1_1, since
4556 // they work like temps, initialize with tex coords, and the
4557 // ps_1_1 TEX opcode expects to overwrite it.
4558 if (!shader_version_atleast(ctx, 1, 4))
4559 {
4560 output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
4561 push_output(ctx, &ctx->mainline_intro);
4562 output_line(ctx, "MOV %s, fragment.texcoord[%d];",
4563 varname, regnum);
4564 pop_output(ctx);
4565 } // if
4566 break;
4567 } // if
4568
4569 // nv4 replaced address registers with generic int registers.
4570 if (support_nv4(ctx))
4571 output_line(ctx, "INT TEMP %s;", varname);
4572 else
4573 {
4574 // nv2 has four-component address already, but stock arb1 has
4575 // to emulate it in a temporary, and move components to the
4576 // scalar ADDRESS register on demand.
4577 output_line(ctx, "ADDRESS %s;", varname);
4578 if (!support_nv2(ctx))
4579 output_line(ctx, "TEMP addr%d;", regnum);
4580 } // else
4581 break;
4582
4583 //case REG_TYPE_PREDICATE:
4584 // output_line(ctx, "bvec4 %s;", varname);
4585 // break;
4586 case REG_TYPE_TEMP:
4587 output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
4588 break;
4589 //case REG_TYPE_LOOP:
4590 // break; // no-op. We declare these in for loops at the moment.
4591 //case REG_TYPE_LABEL:
4592 // break; // no-op. If we see it here, it means we optimized it out.
4593 default:
4594 fail(ctx, "BUG: we used a register we don't know how to define.");
4595 break;
4596 } // switch
4597 pop_output(ctx);
4598 } // emit_ARB1_global
4599
4600 static void emit_ARB1_array(Context *ctx, VariableList *var)
4601 {
4602 // All uniforms are now packed tightly into the program.local array,
4603 // instead of trying to map them to the d3d registers. So this needs to
4604 // map to the next piece of the array we haven't used yet. Thankfully,
4605 // arb1 lets you make a PARAM array that maps to a subset of another
4606 // array; we don't need to do offsets, since myarray[0] can map to
4607 // program.local[5] without any extra math from us.
4608 const int base = var->index;
4609 const int size = var->count;
4610 const int arb1base = ctx->uniform_float4_count +
4611 ctx->uniform_int4_count +
4612 ctx->uniform_bool_count;
4613 char varname[64];
4614 get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname));
4615 push_output(ctx, &ctx->globals);
4616 output_line(ctx, "PARAM %s[%d] = { program.local[%d..%d] };", varname,
4617 size, arb1base, (arb1base + size) - 1);
4618 pop_output(ctx);
4619 var->emit_position = arb1base;
4620 } // emit_ARB1_array
4621
4622 static void emit_ARB1_const_array(Context *ctx, const ConstantsList *clist,
4623 int base, int size)
4624 {
4625 char varname[64];
4626 get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname));
4627 int i;
4628
4629 push_output(ctx, &ctx->globals);
4630 output_line(ctx, "PARAM %s[%d] = {", varname, size);
4631 ctx->indent++;
4632
4633 for (i = 0; i < size; i++)
4634 {
4635 while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
4636 clist = clist->next;
4637 assert(clist->constant.index == (base + i));
4638
4639 char val0[32];
4640 char val1[32];
4641 char val2[32];
4642 char val3[32];
4643 floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
4644 floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
4645 floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
4646 floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
4647
4648 output_line(ctx, "{ %s, %s, %s, %s }%s", val0, val1, val2, val3,
4649 (i < (size-1)) ? "," : "");
4650
4651 clist = clist->next;
4652 } // for
4653
4654 ctx->indent--;
4655 output_line(ctx, "};");
4656 pop_output(ctx);
4657 } // emit_ARB1_const_array
4658
4659 static void emit_ARB1_uniform(Context *ctx, RegisterType regtype, int regnum,
4660 const VariableList *var)
4661 {
4662 // We pack these down into the program.local array, so if we only use
4663 // register c439, it'll actually map to program.local[0]. This will
4664 // prevent overflows when we actually have enough resources to run.
4665
4666 const char *arrayname = "program.local";
4667 int index = 0;
4668
4669 char varname[64];
4670 get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
4671
4672 push_output(ctx, &ctx->globals);
4673
4674 if (var == NULL)
4675 {
4676 // all types share one array (rather, all types convert to float4).
4677 index = ctx->uniform_float4_count + ctx->uniform_int4_count +
4678 ctx->uniform_bool_count;
4679 } // if
4680
4681 else
4682 {
4683 const int arraybase = var->index;
4684 if (var->constant)
4685 {
4686 const int arraysize = var->count;
4687 arrayname = get_ARB1_const_array_varname_in_buf(ctx, arraybase,
4688 arraysize, (char *) alloca(64), 64);
4689 index = (regnum - arraybase);
4690 } // if
4691 else
4692 {
4693 assert(var->emit_position != -1);
4694 index = (regnum - arraybase) + var->emit_position;
4695 } // else
4696 } // else
4697
4698 output_line(ctx, "PARAM %s = %s[%d];", varname, arrayname, index);
4699 pop_output(ctx);
4700 } // emit_ARB1_uniform
4701
4702 static void emit_ARB1_sampler(Context *ctx,int stage,TextureType ttype,int tb)
4703 {
4704 // this is mostly a no-op...you don't predeclare samplers in arb1.
4705
4706 if (tb) // This sampler used a ps_1_1 TEXBEM opcode?
4707 {
4708 const int index = ctx->uniform_float4_count + ctx->uniform_int4_count +
4709 ctx->uniform_bool_count;
4710 char var[64];
4711 get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof(var));
4712 push_output(ctx, &ctx->globals);
4713 output_line(ctx, "PARAM %s_texbem = program.local[%d];", var, index);
4714 output_line(ctx, "PARAM %s_texbeml = program.local[%d];", var, index+1);
4715 pop_output(ctx);
4716 ctx->uniform_float4_count += 2;
4717 } // if
4718 } // emit_ARB1_sampler
4719
4720 // !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute().
4721 static void emit_ARB1_attribute(Context *ctx, RegisterType regtype, int regnum,
4722 MOJOSHADER_usage usage, int index, int wmask,
4723 int flags)
4724 {
4725 // !!! FIXME: this function doesn't deal with write masks at all yet!
4726 const char *usage_str = NULL;
4727 const char *arrayleft = "";
4728 const char *arrayright = "";
4729 char index_str[16] = { '\0' };
4730
4731 char varname[64];
4732 get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
4733
4734 //assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed?
4735
4736 if (index != 0) // !!! FIXME: a lot of these MUST be zero.
4737 snprintf(index_str, sizeof (index_str), "%u", (uint) index);
4738
4739 if (shader_is_vertex(ctx))
4740 {
4741 // pre-vs3 output registers.
4742 // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
4743 // output registers.
4744 if (!shader_version_atleast(ctx, 3, 0))
4745 {
4746 if (regtype == REG_TYPE_RASTOUT)
4747 {
4748 regtype = REG_TYPE_OUTPUT;
4749 index = regnum;
4750 switch ((const RastOutType) regnum)
4751 {
4752 case RASTOUT_TYPE_POSITION:
4753 usage = MOJOSHADER_USAGE_POSITION;
4754 break;
4755 case RASTOUT_TYPE_FOG:
4756 usage = MOJOSHADER_USAGE_FOG;
4757 break;
4758 case RASTOUT_TYPE_POINT_SIZE:
4759 usage = MOJOSHADER_USAGE_POINTSIZE;
4760 break;
4761 } // switch
4762 } // if
4763
4764 else if (regtype == REG_TYPE_ATTROUT)
4765 {
4766 regtype = REG_TYPE_OUTPUT;
4767 usage = MOJOSHADER_USAGE_COLOR;
4768 index = regnum;
4769 } // else if
4770
4771 else if (regtype == REG_TYPE_TEXCRDOUT)
4772 {
4773 regtype = REG_TYPE_OUTPUT;
4774 usage = MOJOSHADER_USAGE_TEXCOORD;
4775 index = regnum;
4776 } // else if
4777 } // if
4778
4779 // to avoid limitations of various GL entry points for input
4780 // attributes (glSecondaryColorPointer() can only take 3 component
4781 // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
4782 // issues), we set up all inputs as generic vertex attributes, so we
4783 // can pass data in just about any form, and ignore the built-in GLSL
4784 // attributes like gl_SecondaryColor. Output needs to use the the
4785 // built-ins, though, but we don't have to worry about the GL entry
4786 // point limitations there.
4787
4788 if (regtype == REG_TYPE_INPUT)
4789 {
4790 const int attr = ctx->assigned_vertex_attributes++;
4791 push_output(ctx, &ctx->globals);
4792 output_line(ctx, "ATTRIB %s = vertex.attrib[%d];", varname, attr);
4793 pop_output(ctx);
4794 } // if
4795
4796 else if (regtype == REG_TYPE_OUTPUT)
4797 {
4798 switch (usage)
4799 {
4800 case MOJOSHADER_USAGE_POSITION:
4801 ctx->arb1_wrote_position = 1;
4802 usage_str = "result.position";
4803 break;
4804 case MOJOSHADER_USAGE_POINTSIZE:
4805 usage_str = "result.pointsize";
4806 break;
4807 case MOJOSHADER_USAGE_COLOR:
4808 index_str[0] = '\0'; // no explicit number.
4809 if (index == 0)
4810 usage_str = "result.color.primary";
4811 else if (index == 1)
4812 usage_str = "result.color.secondary";
4813 break;
4814 case MOJOSHADER_USAGE_FOG:
4815 usage_str = "result.fogcoord";
4816 break;
4817 case MOJOSHADER_USAGE_TEXCOORD:
4818 snprintf(index_str, sizeof (index_str), "%u", (uint) index);
4819 usage_str = "result.texcoord";
4820 arrayleft = "[";
4821 arrayright = "]";
4822 break;
4823 default:
4824 // !!! FIXME: we need to deal with some more built-in varyings here.
4825 break;
4826 } // switch
4827
4828 // !!! FIXME: the #define is a little hacky, but it means we don't
4829 // !!! FIXME: have to track these separately if this works.
4830 push_output(ctx, &ctx->globals);
4831 // no mapping to built-in var? Just make it a regular global, pray.
4832 if (usage_str == NULL)
4833 output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
4834 else
4835 {
4836 output_line(ctx, "OUTPUT %s = %s%s%s%s;", varname, usage_str,
4837 arrayleft, index_str, arrayright);
4838 } // else
4839 pop_output(ctx);
4840 } // else if
4841
4842 else
4843 {
4844 fail(ctx, "unknown vertex shader attribute register");
4845 } // else
4846 } // if
4847
4848 else if (shader_is_pixel(ctx))
4849 {
4850 const char *paramtype_str = "ATTRIB";
4851
4852 // samplers DCLs get handled in emit_ARB1_sampler().
4853
4854 if (flags & MOD_CENTROID)
4855 {
4856 if (!support_nv4(ctx)) // GL_NV_fragment_program4 adds centroid.
4857 {
4858 // !!! FIXME: should we just wing it without centroid here?
4859 failf(ctx, "centroid unsupported in %s profile",
4860 ctx->profile->name);
4861 return;
4862 } // if
4863
4864 paramtype_str = "CENTROID ATTRIB";
4865 } // if
4866
4867 if (regtype == REG_TYPE_COLOROUT)
4868 {
4869 paramtype_str = "OUTPUT";
4870 usage_str = "result.color";
4871 if (ctx->have_multi_color_outputs)
4872 {
4873 // We have to gamble that you have GL_ARB_draw_buffers.
4874 // You probably do at this point if you have a sane setup.
4875 snprintf(index_str, sizeof (index_str), "%u", (uint) regnum);
4876 arrayleft = "[";
4877 arrayright = "]";
4878 } // if
4879 } // if
4880
4881 else if (regtype == REG_TYPE_DEPTHOUT)
4882 {
4883 paramtype_str = "OUTPUT";
4884 usage_str = "result.depth";
4885 } // else if
4886
4887 // !!! FIXME: can you actualy have a texture register with COLOR usage?
4888 else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
4889 {
4890 if (usage == MOJOSHADER_USAGE_TEXCOORD)
4891 {
4892 // ps_1_1 does a different hack for this attribute.
4893 // Refer to emit_ARB1_global()'s REG_TYPE_TEXTURE code.
4894 if (shader_version_atleast(ctx, 1, 4))
4895 {
4896 snprintf(index_str, sizeof (index_str), "%u", (uint) index);
4897 usage_str = "fragment.texcoord";
4898 arrayleft = "[";
4899 arrayright = "]";
4900 } // if
4901 } // if
4902
4903 else if (usage == MOJOSHADER_USAGE_COLOR)
4904 {
4905 index_str[0] = '\0'; // no explicit number.
4906 if (index == 0)
4907 usage_str = "fragment.color.primary";
4908 else if (index == 1)
4909 usage_str = "fragment.color.secondary";
4910 else
4911 fail(ctx, "unsupported color index");
4912 } // else if
4913 } // else if
4914
4915 else if (regtype == REG_TYPE_MISCTYPE)
4916 {
4917 const MiscTypeType mt = (MiscTypeType) regnum;
4918 if (mt == MISCTYPE_TYPE_FACE)
4919 {
4920 if (support_nv4(ctx)) // FINALLY, a vFace equivalent in nv4!
4921 {
4922 index_str[0] = '\0'; // no explicit number.
4923 usage_str = "fragment.facing";
4924 } // if
4925 else
4926 {
4927 failf(ctx, "vFace unsupported in %s profile",
4928 ctx->profile->name);
4929 } // else
4930 } // if
4931 else if (mt == MISCTYPE_TYPE_POSITION)
4932 {
4933 index_str[0] = '\0'; // no explicit number.
4934 usage_str = "fragment.position"; // !!! FIXME: is this the same coord space as D3D?
4935 } // else if
4936 else
4937 {
4938 fail(ctx, "BUG: unhandled misc register");
4939 } // else
4940 } // else if
4941
4942 else
4943 {
4944 fail(ctx, "unknown pixel shader attribute register");
4945 } // else
4946
4947 if (usage_str != NULL)
4948 {
4949 push_output(ctx, &ctx->globals);
4950 output_line(ctx, "%s %s = %s%s%s%s;", paramtype_str, varname,
4951 usage_str, arrayleft, index_str, arrayright);
4952 pop_output(ctx);
4953 } // if
4954 } // else if
4955
4956 else
4957 {
4958 fail(ctx, "Unknown shader type"); // state machine should catch this.
4959 } // else
4960 } // emit_ARB1_attribute
4961
4962 static void emit_ARB1_RESERVED(Context *ctx) { /* no-op. */ }
4963
4964 static void emit_ARB1_NOP(Context *ctx)
4965 {
4966 // There is no NOP in arb1. Just don't output anything here.
4967 } // emit_ARB1_NOP
4968
4969 EMIT_ARB1_OPCODE_DS_FUNC(MOV)
4970 EMIT_ARB1_OPCODE_DSS_FUNC(ADD)
4971 EMIT_ARB1_OPCODE_DSS_FUNC(SUB)
4972 EMIT_ARB1_OPCODE_DSSS_FUNC(MAD)
4973 EMIT_ARB1_OPCODE_DSS_FUNC(MUL)
4974 EMIT_ARB1_OPCODE_DS_FUNC(RCP)
4975
4976 static void emit_ARB1_RSQ(Context *ctx)
4977 {
4978 // nv4 doesn't force abs() on this, so negative values will generate NaN.
4979 // The spec says you should force the abs() yourself.
4980 if (!support_nv4(ctx))
4981 {
4982 emit_ARB1_opcode_ds(ctx, "RSQ"); // pre-nv4 implies ABS.
4983 return;
4984 } // if
4985
4986 // we can optimize this to use nv2's |abs| construct in some cases.
4987 if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
4988 (ctx->source_args[0].src_mod == SRCMOD_NEGATE) ||
4989 (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
4990 ctx->source_args[0].src_mod = SRCMOD_ABS;
4991
4992 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4993 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
4994
4995 if (ctx->source_args[0].src_mod == SRCMOD_ABS)
4996 output_line(ctx, "RSQ%s, %s;", dst, src0);
4997 else
4998 {
4999 char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5000 output_line(ctx, "ABS %s, %s;", buf, src0);
5001 output_line(ctx, "RSQ%s, %s.x;", dst, buf);
5002 } // else
5003
5004 emit_ARB1_dest_modifiers(ctx);
5005 } // emit_ARB1_RSQ
5006
5007 EMIT_ARB1_OPCODE_DSS_FUNC(DP3)
5008 EMIT_ARB1_OPCODE_DSS_FUNC(DP4)
5009 EMIT_ARB1_OPCODE_DSS_FUNC(MIN)
5010 EMIT_ARB1_OPCODE_DSS_FUNC(MAX)
5011 EMIT_ARB1_OPCODE_DSS_FUNC(SLT)
5012 EMIT_ARB1_OPCODE_DSS_FUNC(SGE)
5013
5014 static void emit_ARB1_EXP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
5015
5016 static void arb1_log(Context *ctx, const char *opcode)
5017 {
5018 // !!! FIXME: SRCMOD_NEGATE can be made into SRCMOD_ABS here, too
5019 // we can optimize this to use nv2's |abs| construct in some cases.
5020 if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
5021 (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
5022 ctx->source_args[0].src_mod = SRCMOD_ABS;
5023
5024 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5025 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5026
5027 if (ctx->source_args[0].src_mod == SRCMOD_ABS)
5028 output_line(ctx, "%s%s, %s;", opcode, dst, src0);
5029 else
5030 {
5031 char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5032 output_line(ctx, "ABS %s, %s;", buf, src0);
5033 output_line(ctx, "%s%s, %s.x;", opcode, dst, buf);
5034 } // else
5035
5036 emit_ARB1_dest_modifiers(ctx);
5037 } // arb1_log
5038
5039
5040 static void emit_ARB1_LOG(Context *ctx)
5041 {
5042 arb1_log(ctx, "LG2");
5043 } // emit_ARB1_LOG
5044
5045
5046 EMIT_ARB1_OPCODE_DS_FUNC(LIT)
5047 EMIT_ARB1_OPCODE_DSS_FUNC(DST)
5048
5049 static void emit_ARB1_LRP(Context *ctx)
5050 {
5051 if (shader_is_pixel(ctx)) // fragment shaders have a matching LRP opcode.
5052 emit_ARB1_opcode_dsss(ctx, "LRP");
5053 else
5054 {
5055 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5056 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5057 char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
5058 char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
5059 char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5060
5061 // LRP is: dest = src2 + src0 * (src1 - src2)
5062 output_line(ctx, "SUB %s, %s, %s;", buf, src1, src2);
5063 output_line(ctx, "MAD%s, %s, %s, %s;", dst, buf, src0, src2);
5064 emit_ARB1_dest_modifiers(ctx);
5065 } // else
5066 } // emit_ARB1_LRP
5067
5068 EMIT_ARB1_OPCODE_DS_FUNC(FRC)
5069
5070 static void arb1_MxXy(Context *ctx, const int x, const int y)
5071 {
5072 DestArgInfo *dstarg = &ctx->dest_arg;
5073 const int origmask = dstarg->writemask;
5074 char src0[64];
5075 int i;
5076
5077 make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5078
5079 for (i = 0; i < y; i++)
5080 {
5081 char dst[64];
5082 char row[64];
5083 make_ARB1_srcarg_string(ctx, i + 1, row, sizeof (row));
5084 set_dstarg_writemask(dstarg, 1 << i);
5085 make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5086 output_line(ctx, "DP%d%s, %s, %s;", x, dst, src0, row);
5087 } // for
5088
5089 set_dstarg_writemask(dstarg, origmask);
5090 emit_ARB1_dest_modifiers(ctx);
5091 } // arb1_MxXy
5092
5093 static void emit_ARB1_M4X4(Context *ctx) { arb1_MxXy(ctx, 4, 4); }
5094 static void emit_ARB1_M4X3(Context *ctx) { arb1_MxXy(ctx, 4, 3); }
5095 static void emit_ARB1_M3X4(Context *ctx) { arb1_MxXy(ctx, 3, 4); }
5096 static void emit_ARB1_M3X3(Context *ctx) { arb1_MxXy(ctx, 3, 3); }
5097 static void emit_ARB1_M3X2(Context *ctx) { arb1_MxXy(ctx, 3, 2); }
5098
5099 static void emit_ARB1_CALL(Context *ctx)
5100 {
5101 if (!support_nv2(ctx)) // no branching in stock ARB1.
5102 {
5103 failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5104 return;
5105 } // if
5106
5107 char labelstr[64];
5108 get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
5109 output_line(ctx, "CAL %s;", labelstr);
5110 } // emit_ARB1_CALL
5111
5112 static void emit_ARB1_CALLNZ(Context *ctx)
5113 {
5114 // !!! FIXME: if src1 is a constbool that's true, we can remove the
5115 // !!! FIXME: if. If it's false, we can make this a no-op.
5116
5117 if (!support_nv2(ctx)) // no branching in stock ARB1.
5118 failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5119 else
5120 {
5121 // !!! FIXME: double-check this.
5122 char labelstr[64];
5123 char scratch[64];
5124 char src1[64];
5125 get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
5126 get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));
5127 allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
5128 output_line(ctx, "MOVC %s, %s;", scratch, src1);
5129 output_line(ctx, "CAL %s (NE.x);", labelstr);
5130 } // else
5131 } // emit_ARB1_CALLNZ
5132
5133 // !!! FIXME: needs BRA in nv2, LOOP in nv2 fragment progs, and REP in nv4.
5134 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOOP)
5135
5136 static void emit_ARB1_RET(Context *ctx)
5137 {
5138 // don't fail() if no nv2...maybe we're just ending the mainline?
5139 // if we're ending a LABEL that had no CALL, this would all be written
5140 // to ctx->ignore anyhow, so this should be "safe" ... arb1 profile will
5141 // just end up throwing all this code out.
5142 if (support_nv2(ctx)) // no branching in stock ARB1.
5143 output_line(ctx, "RET;");
5144 set_output(ctx, &ctx->mainline); // in case we were ignoring this function.
5145 } // emit_ARB1_RET
5146
5147
5148 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(ENDLOOP)
5149
5150 static void emit_ARB1_LABEL(Context *ctx)
5151 {
5152 if (!support_nv2(ctx)) // no branching in stock ARB1.
5153 return; // don't fail()...maybe we never use it, but do fail in CALL.
5154
5155 const int label = ctx->source_args[0].regnum;
5156 RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
5157
5158 // MSDN specs say CALL* has to come before the LABEL, so we know if we
5159 // can ditch the entire function here as unused.
5160 if (reg == NULL)
5161 set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output.
5162
5163 // !!! FIXME: it would be nice if we could determine if a function is
5164 // !!! FIXME: only called once and, if so, forcibly inline it.
5165
5166 //const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
5167 char labelstr[64];
5168 get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
5169 output_line(ctx, "%s:", labelstr);
5170 } // emit_ARB1_LABEL
5171
5172
5173 static void emit_ARB1_POW(Context *ctx)
5174 {
5175 // we can optimize this to use nv2's |abs| construct in some cases.
5176 if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
5177 (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
5178 ctx->source_args[0].src_mod = SRCMOD_ABS;
5179
5180 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5181 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5182 char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
5183
5184 if (ctx->source_args[0].src_mod == SRCMOD_ABS)
5185 output_line(ctx, "POW%s, %s, %s;", dst, src0, src1);
5186 else
5187 {
5188 char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5189 output_line(ctx, "ABS %s, %s;", buf, src0);
5190 output_line(ctx, "POW%s, %s.x, %s;", dst, buf, src1);
5191 } // else
5192
5193 emit_ARB1_dest_modifiers(ctx);
5194 } // emit_ARB1_POW
5195
5196 static void emit_ARB1_CRS(Context *ctx) { emit_ARB1_opcode_dss(ctx, "XPD"); }
5197
5198 static void emit_ARB1_SGN(Context *ctx)
5199 {
5200 if (support_nv2(ctx))
5201 emit_ARB1_opcode_ds(ctx, "SSG");
5202 else
5203 {
5204 char dst[64];
5205 char src0[64];
5206 char scratch1[64];
5207 char scratch2[64];
5208 make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5209 make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5210 allocate_ARB1_scratch_reg_name(ctx, scratch1, sizeof (scratch1));
5211 allocate_ARB1_scratch_reg_name(ctx, scratch2, sizeof (scratch2));
5212 output_line(ctx, "SLT %s, %s, 0.0;", scratch1, src0);
5213 output_line(ctx, "SLT %s, -%s, 0.0;", scratch2, src0);
5214 output_line(ctx, "ADD%s -%s, %s;", dst, scratch1, scratch2);
5215 emit_ARB1_dest_modifiers(ctx);
5216 } // else
5217 } // emit_ARB1_SGN
5218
5219 EMIT_ARB1_OPCODE_DS_FUNC(ABS)
5220
5221 static void emit_ARB1_NRM(Context *ctx)
5222 {
5223 // nv2 fragment programs (and anything nv4) have a real NRM.
5224 if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5225 emit_ARB1_opcode_ds(ctx, "NRM");
5226 else
5227 {
5228 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5229 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5230 char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5231 output_line(ctx, "DP3 %s.w, %s, %s;", buf, src0, src0);
5232 output_line(ctx, "RSQ %s.w, %s.w;", buf, buf);
5233 output_line(ctx, "MUL%s, %s.w, %s;", dst, buf, src0);
5234 emit_ARB1_dest_modifiers(ctx);
5235 } // else
5236 } // emit_ARB1_NRM
5237
5238
5239 static void emit_ARB1_SINCOS(Context *ctx)
5240 {
5241 // we don't care about the temp registers that <= sm2 demands; ignore them.
5242 const int mask = ctx->dest_arg.writemask;
5243
5244 // arb1 fragment programs and everything nv4 have sin/cos/sincos opcodes.
5245 if ((shader_is_pixel(ctx)) || (support_nv4(ctx)))
5246 {
5247 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5248 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5249 if (writemask_x(mask))
5250 output_line(ctx, "COS%s, %s;", dst, src0);
5251 else if (writemask_y(mask))
5252 output_line(ctx, "SIN%s, %s;", dst, src0);
5253 else if (writemask_xy(mask))
5254 output_line(ctx, "SCS%s, %s;", dst, src0);
5255 } // if
5256
5257 // nv2+ profiles have sin and cos opcodes.
5258 else if (support_nv2(ctx))
5259 {
5260 char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5261 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5262 if (writemask_x(mask))
5263 output_line(ctx, "COS %s.x, %s;", dst, src0);
5264 else if (writemask_y(mask))
5265 output_line(ctx, "SIN %s.y, %s;", dst, src0);
5266 else if (writemask_xy(mask))
5267 {
5268 output_line(ctx, "SIN %s.x, %s;", dst, src0);
5269 output_line(ctx, "COS %s.y, %s;", dst, src0);
5270 } // else if
5271 } // if
5272
5273 else // big nasty.
5274 {
5275 char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5276 char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
5277 const int need_sin = (writemask_x(mask) || writemask_xy(mask));
5278 const int need_cos = (writemask_y(mask) || writemask_xy(mask));
5279 char scratch[64];
5280
5281 if (need_sin || need_cos)
5282 allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
5283
5284 // These sin() and cos() approximations originally found here:
5285 // http://www.devmaster.net/forums/showthread.php?t=5784
5286 //
5287 // const float B = 4.0f / M_PI;
5288 // const float C = -4.0f / (M_PI * M_PI);
5289 // float y = B * x + C * x * fabs(x);
5290 //
5291 // // optional better precision...
5292 // const float P = 0.225f;
5293 // y = P * (y * fabs(y) - y) + y;
5294 //
5295 //
5296 // That first thing can be reduced to:
5297 // const float y = ((1.2732395447351626861510701069801f * x) +
5298 // ((-0.40528473456935108577551785283891f * x) * fabs(x)));
5299
5300 if (need_sin)
5301 {
5302 // !!! FIXME: use SRCMOD_ABS here?
5303 output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
5304 output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
5305 output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
5306 output_line(ctx, "MAD %s.x, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
5307 } // if
5308
5309 // cosine is sin(x + M_PI/2), but you have to wrap x to pi:
5310 // if (x+(M_PI/2) > M_PI)
5311 // x -= 2 * M_PI;
5312 //
5313 // which is...
5314 // if (x+(1.57079637050628662109375) > 3.1415927410125732421875)
5315 // x += -6.283185482025146484375;
5316
5317 if (need_cos)
5318 {
5319 output_line(ctx, "ADD %s.x, %s.x, 1.57079637050628662109375;", scratch, src0);
5320 output_line(ctx, "SGE %s.y, %s.x, 3.1415927410125732421875;", scratch, scratch);
5321 output_line(ctx, "MAD %s.x, %s.y, -6.283185482025146484375, %s.x;", scratch, scratch, scratch);
5322 output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
5323 output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
5324 output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
5325 output_line(ctx, "MAD %s.y, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
5326 } // if
5327 } // else
5328
5329 // !!! FIXME: might not have done anything. Don't emit if we didn't.
5330 if (!isfail(ctx))
5331 emit_ARB1_dest_modifiers(ctx);
5332 } // emit_ARB1_SINCOS
5333
5334
5335 static void emit_ARB1_REP(Context *ctx)
5336 {
5337 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5338
5339 // nv2 fragment programs (and everything nv4) have a real REP.
5340 if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5341 output_line(ctx, "REP %s;", src0);
5342
5343 else if (support_nv2(ctx))
5344 {
5345 // no REP, but we can use branches.
5346 char failbranch[32];
5347 char topbranch[32];
5348 const int toplabel = allocate_branch_label(ctx);
5349 const int faillabel = allocate_branch_label(ctx);
5350 get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
5351 get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch));
5352
5353 assert(((size_t) ctx->branch_labels_stack_index) <
5354 STATICARRAYLEN(ctx->branch_labels_stack)-1);
5355
5356 ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = toplabel;
5357 ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = faillabel;
5358
5359 char scratch[32];
5360 snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
5361 output_line(ctx, "MOVC %s.x, %s;", scratch, src0);
5362 output_line(ctx, "BRA %s (LE.x);", failbranch);
5363 output_line(ctx, "%s:", topbranch);
5364 } // else if
5365
5366 else // stock ARB1 has no branching.
5367 {
5368 fail(ctx, "branching unsupported in this profile");
5369 } // else
5370 } // emit_ARB1_REP
5371
5372
5373 static void emit_ARB1_ENDREP(Context *ctx)
5374 {
5375 // nv2 fragment programs (and everything nv4) have a real ENDREP.
5376 if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5377 output_line(ctx, "ENDREP;");
5378
5379 else if (support_nv2(ctx))
5380 {
5381 // no ENDREP, but we can use branches.
5382 assert(ctx->branch_labels_stack_index >= 2);
5383
5384 char failbranch[32];
5385 char topbranch[32];
5386 const int faillabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
5387 const int toplabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
5388 get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
5389 get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch));
5390
5391 char scratch[32];
5392 snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
5393 output_line(ctx, "SUBC %s.x, %s.x, 1.0;", scratch, scratch);
5394 output_line(ctx, "BRA %s (GT.x);", topbranch);
5395 output_line(ctx, "%s:", failbranch);
5396 } // else if
5397
5398 else // stock ARB1 has no branching.
5399 {
5400 fail(ctx, "branching unsupported in this profile");
5401 } // else
5402 } // emit_ARB1_ENDREP
5403
5404
5405 static void nv2_if(Context *ctx)
5406 {
5407 // The condition code register MUST be set up before this!
5408 // nv2 fragment programs (and everything nv4) have a real IF.
5409 if ( (support_nv4(ctx)) || (shader_is_pixel(ctx)) )
5410 output_line(ctx, "IF EQ.x;");
5411 else
5412 {
5413 // there's no IF construct, but we can use a branch to a label.
5414 char failbranch[32];
5415 const int label = allocate_branch_label(ctx);
5416 get_ARB1_branch_label_name(ctx, label, failbranch, sizeof (failbranch));
5417
5418 assert(((size_t) ctx->branch_labels_stack_index)
5419 < STATICARRAYLEN(ctx->branch_labels_stack));
5420
5421 ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = label;
5422
5423 // !!! FIXME: should this be NE? (EQ would jump to the ELSE for the IF condition, right?).
5424 output_line(ctx, "BRA %s (EQ.x);", failbranch);
5425 } // else
5426 } // nv2_if
5427
5428
5429 static void emit_ARB1_IF(Context *ctx)
5430 {
5431 if (support_nv2(ctx))
5432 {
5433 char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5434 char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
5435 output_line(ctx, "MOVC %s.x, %s;", buf, src0);
5436 nv2_if(ctx);
5437 } // if
5438
5439 else // stock ARB1 has no branching.
5440 {
5441 failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5442 } // else
5443 } // emit_ARB1_IF
5444
5445
5446 static void emit_ARB1_ELSE(Context *ctx)
5447 {
5448 // nv2 fragment programs (and everything nv4) have a real ELSE.
5449 if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5450 output_line(ctx, "ELSE;");
5451
5452 else if (support_nv2(ctx))
5453 {
5454 // there's no ELSE construct, but we can use a branch to a label.
5455 assert(ctx->branch_labels_stack_index > 0);
5456
5457 // At the end of the IF block, unconditionally jump to the ENDIF.
5458 const int endlabel = allocate_branch_label(ctx);
5459 char endbranch[32];
5460 get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch));
5461 output_line(ctx, "BRA %s;", endbranch);
5462
5463 // Now mark the ELSE section with a lable.
5464 const int elselabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index-1];
5465 char elsebranch[32];
5466 get_ARB1_branch_label_name(ctx,elselabel,elsebranch,sizeof(elsebranch));
5467 output_line(ctx, "%s:", elsebranch);
5468
5469 // Replace the ELSE label with the ENDIF on the label stack.
5470 ctx->branch_labels_stack[ctx->branch_labels_stack_index-1] = endlabel;
5471 } // else if
5472
5473 else // stock ARB1 has no branching.
5474 {
5475 failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5476 } // else
5477 } // emit_ARB1_ELSE
5478
5479
5480 static void emit_ARB1_ENDIF(Context *ctx)
5481 {
5482 // nv2 fragment programs (and everything nv4) have a real ENDIF.
5483 if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5484 output_line(ctx, "ENDIF;");
5485
5486 else if (support_nv2(ctx))
5487 {
5488 // there's no ENDIF construct, but we can use a branch to a label.
5489 assert(ctx->branch_labels_stack_index > 0);
5490 const int endlabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
5491 char endbranch[32];
5492 get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch));
5493 output_line(ctx, "%s:", endbranch);
5494 } // if
5495
5496 else // stock ARB1 has no branching.
5497 {
5498 failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5499 } // else
5500 } // emit_ARB1_ENDIF
5501
5502
5503 static void emit_ARB1_BREAK(Context *ctx)
5504 {
5505 // nv2 fragment programs (and everything nv4) have a real BREAK.
5506 if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5507 output_line(ctx, "BRK;");
5508
5509 else if (support_nv2(ctx))
5510 {
5511 // no BREAK, but we can use branches.
5512 assert(ctx->branch_labels_stack_index >= 2);
5513 const int faillabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index];
5514 char failbranch[32];
5515 get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
5516 output_line(ctx, "BRA %s;", failbranch);
5517 } // else if
5518
5519 else // stock ARB1 has no branching.
5520 {
5521 failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5522 } // else
5523 } // emit_ARB1_BREAK
5524
5525
5526 static void emit_ARB1_MOVA(Context *ctx)
5527 {
5528 // nv2 and nv3 can use the ARR opcode.
5529 // But nv4 removed ARR (and ADDRESS registers!). Just ROUND to an INT.
5530 if (support_nv4(ctx))
5531 emit_ARB1_opcode_ds(ctx, "ROUND.S"); // !!! FIXME: don't use a modifier here.
5532 else if ((support_nv2(ctx)) || (support_nv3(ctx)))
5533 emit_ARB1_opcode_ds(ctx, "ARR");
5534 else
5535 {
5536 char src0[64];
5537 char scratch[64];
5538 char addr[32];
5539
5540 make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5541 allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
5542 snprintf(addr, sizeof (addr), "addr%d", ctx->dest_arg.regnum);
5543
5544 // !!! FIXME: we can optimize this if src_mod is ABS or ABSNEGATE.
5545
5546 // ARL uses floor(), but D3D expects round-to-nearest.
5547 // There is probably a more efficient way to do this.
5548 if (shader_is_pixel(ctx)) // CMP only exists in fragment programs. :/
5549 output_line(ctx, "CMP %s, %s, -1.0, 1.0;", scratch, src0);
5550 else
5551 {
5552 output_line(ctx, "SLT %s, %s, 0.0;", scratch, src0);
5553 output_line(ctx, "MAD %s, %s, -2.0, 1.0;", scratch, scratch);
5554 } // else
5555
5556 output_line(ctx, "ABS %s, %s;", addr, src0);
5557 output_line(ctx, "ADD %s, %s, 0.5;", addr, addr);
5558 output_line(ctx, "FLR %s, %s;", addr, addr);
5559 output_line(ctx, "MUL %s, %s, %s;", addr, addr, scratch);
5560
5561 // we don't handle these right now, since emit_ARB1_dest_modifiers(ctx)
5562 // wants to look at dest_arg, not our temp register.
5563 assert(ctx->dest_arg.result_mod == 0);
5564 assert(ctx->dest_arg.result_shift == 0);
5565
5566 // we assign to the actual address register as needed.
5567 ctx->last_address_reg_component = -1;
5568 } // else
5569 } // emit_ARB1_MOVA
5570
5571
5572 static void emit_ARB1_TEXKILL(Context *ctx)
5573 {
5574 // d3d kills on xyz, arb1 kills on xyzw. Fix the swizzle.
5575 // We just map the x component to w. If it's negative, the fragment
5576 // would discard anyhow, otherwise, it'll pass through okay. This saves
5577 // us a temp register.
5578 char dst[64];
5579 get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5580 output_line(ctx, "KIL %s.xyzx;", dst);
5581 } // emit_ARB1_TEXKILL
5582
5583 static void arb1_texbem(Context *ctx, const int luminance)
5584 {
5585 // !!! FIXME: this code counts on the register not having swizzles, etc.
5586 const int stage = ctx->dest_arg.regnum;
5587 char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5588 char src[64]; get_ARB1_srcarg_varname(ctx, 0, src, sizeof (src));
5589 char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
5590 char sampler[64];
5591 get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage,
5592 sampler, sizeof (sampler));
5593
5594 output_line(ctx, "MUL %s, %s_texbem.xzyw, %s.xyxy;", tmp, sampler, src);
5595 output_line(ctx, "ADD %s.xy, %s.xzxx, %s.ywxx;", tmp, tmp, tmp);
5596 output_line(ctx, "ADD %s.xy, %s, %s;", tmp, tmp, dst);
5597 output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, tmp, stage);
5598
5599 if (luminance) // TEXBEML, not just TEXBEM?
5600 {
5601 output_line(ctx, "MAD %s, %s.zzzz, %s_texbeml.xxxx, %s_texbeml.yyyy;",
5602 tmp, src, sampler, sampler);
5603 output_line(ctx, "MUL %s, %s, %s;", dst, dst, tmp);
5604 } // if
5605
5606 emit_ARB1_dest_modifiers(ctx);
5607 } // arb1_texbem
5608
5609 static void emit_ARB1_TEXBEM(Context *ctx)
5610 {
5611 arb1_texbem(ctx, 0);
5612 } // emit_ARB1_TEXBEM
5613
5614 static void emit_ARB1_TEXBEML(Context *ctx)
5615 {
5616 arb1_texbem(ctx, 1);
5617 } // emit_ARB1_TEXBEML
5618
5619 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR)
5620 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB)
5621
5622
5623 static void emit_ARB1_TEXM3X2PAD(Context *ctx)
5624 {
5625 // no-op ... work happens in emit_ARB1_TEXM3X2TEX().
5626 } // emit_ARB1_TEXM3X2PAD
5627
5628 static void emit_ARB1_TEXM3X2TEX(Context *ctx)
5629 {
5630 if (ctx->texm3x2pad_src0 == -1)
5631 return;
5632
5633 char dst[64];
5634 char src0[64];
5635 char src1[64];
5636 char src2[64];
5637
5638 // !!! FIXME: this code counts on the register not having swizzles, etc.
5639 const int stage = ctx->dest_arg.regnum;
5640 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0,
5641 src0, sizeof (src0));
5642 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0,
5643 src1, sizeof (src1));
5644 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
5645 src2, sizeof (src2));
5646 get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5647
5648 output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, dst);
5649 output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
5650 output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, dst, stage);
5651 emit_ARB1_dest_modifiers(ctx);
5652 } // emit_ARB1_TEXM3X2TEX
5653
5654
5655 static void emit_ARB1_TEXM3X3PAD(Context *ctx)
5656 {
5657 // no-op ... work happens in emit_ARB1_TEXM3X3*().
5658 } // emit_ARB1_TEXM3X3PAD
5659
5660
5661 static void emit_ARB1_TEXM3X3TEX(Context *ctx)
5662 {
5663 if (ctx->texm3x3pad_src1 == -1)
5664 return;
5665
5666 char dst[64];
5667 char src0[64];
5668 char src1[64];
5669 char src2[64];
5670 char src3[64];
5671 char src4[64];
5672
5673 // !!! FIXME: this code counts on the register not having swizzles, etc.
5674 const int stage = ctx->dest_arg.regnum;
5675 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
5676 src0, sizeof (src0));
5677 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
5678 src1, sizeof (src1));
5679 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
5680 src2, sizeof (src2));
5681 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
5682 src3, sizeof (src3));
5683 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
5684 src4, sizeof (src4));
5685 get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5686
5687 RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
5688 const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
5689 const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
5690
5691 output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
5692 output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
5693 output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
5694 output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, dst, stage, ttypestr);
5695 emit_ARB1_dest_modifiers(ctx);
5696 } // emit_ARB1_TEXM3X3TEX
5697
5698 static void emit_ARB1_TEXM3X3SPEC(Context *ctx)
5699 {
5700 if (ctx->texm3x3pad_src1 == -1)
5701 return;
5702
5703 char dst[64];
5704 char src0[64];
5705 char src1[64];
5706 char src2[64];
5707 char src3[64];
5708 char src4[64];
5709 char src5[64];
5710 char tmp[64];
5711 char tmp2[64];
5712
5713 // !!! FIXME: this code counts on the register not having swizzles, etc.
5714 const int stage = ctx->dest_arg.regnum;
5715 allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
5716 allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2));
5717 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
5718 src0, sizeof (src0));
5719 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
5720 src1, sizeof (src1));
5721 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
5722 src2, sizeof (src2));
5723 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
5724 src3, sizeof (src3));
5725 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
5726 src4, sizeof (src4));
5727 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum,
5728 src5, sizeof (src5));
5729 get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5730
5731 RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
5732 const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
5733 const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
5734
5735 output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
5736 output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
5737 output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
5738 output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal
5739 output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, src5); // normal * eyeray
5740
5741 // !!! FIXME: This is goofy. There's got to be a way to do vector-wide
5742 // !!! FIXME: divides or reciprocals...right?
5743 output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2);
5744 output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2);
5745 output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2);
5746 output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2);
5747 output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2);
5748
5749 output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp);
5750 output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, src5);
5751 output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr);
5752 emit_ARB1_dest_modifiers(ctx);
5753 } // emit_ARB1_TEXM3X3SPEC
5754
5755 static void emit_ARB1_TEXM3X3VSPEC(Context *ctx)
5756 {
5757 if (ctx->texm3x3pad_src1 == -1)
5758 return;
5759
5760 char dst[64];
5761 char src0[64];
5762 char src1[64];
5763 char src2[64];
5764 char src3[64];
5765 char src4[64];
5766 char tmp[64];
5767 char tmp2[64];
5768 char tmp3[64];
5769
5770 // !!! FIXME: this code counts on the register not having swizzles, etc.
5771 const int stage = ctx->dest_arg.regnum;
5772 allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
5773 allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2));
5774 allocate_ARB1_scratch_reg_name(ctx, tmp3, sizeof (tmp3));
5775 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
5776 src0, sizeof (src0));
5777 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
5778 src1, sizeof (src1));
5779 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
5780 src2, sizeof (src2));
5781 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
5782 src3, sizeof (src3));
5783 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
5784 src4, sizeof (src4));
5785 get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5786
5787 RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
5788 const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
5789 const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
5790
5791 output_line(ctx, "MOV %s.x, %s.w;", tmp3, src0);
5792 output_line(ctx, "MOV %s.y, %s.w;", tmp3, src2);
5793 output_line(ctx, "MOV %s.z, %s.w;", tmp3, dst);
5794 output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
5795 output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
5796 output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
5797 output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal
5798 output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, tmp3); // normal * eyeray
5799
5800 // !!! FIXME: This is goofy. There's got to be a way to do vector-wide
5801 // !!! FIXME: divides or reciprocals...right?
5802 output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2);
5803 output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2);
5804 output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2);
5805 output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2);
5806 output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2);
5807
5808 output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp);
5809 output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, tmp3);
5810 output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr);
5811 emit_ARB1_dest_modifiers(ctx);
5812 } // emit_ARB1_TEXM3X3VSPEC
5813
5814 static void emit_ARB1_EXPP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
5815 static void emit_ARB1_LOGP(Context *ctx) { arb1_log(ctx, "LG2"); }
5816
5817 static void emit_ARB1_CND(Context *ctx)
5818 {
5819 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5820 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5821 char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
5822 char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
5823 char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
5824
5825 // CND compares against 0.5, but we need to compare against 0.0...
5826 // ...subtract to make up the difference.
5827 output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", tmp, src0);
5828 // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
5829 // switch src1 and src2 to get the same results.
5830 output_line(ctx, "CMP%s, %s, %s, %s;", dst, tmp, src2, src1);
5831 emit_ARB1_dest_modifiers(ctx);
5832 } // emit_ARB1_CND
5833
5834 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB)
5835 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX)
5836 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH)
5837 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3)
5838
5839 static void emit_ARB1_TEXM3X3(Context *ctx)
5840 {
5841 if (ctx->texm3x3pad_src1 == -1)
5842 return;
5843
5844 char dst[64];
5845 char src0[64];
5846 char src1[64];
5847 char src2[64];
5848 char src3[64];
5849 char src4[64];
5850
5851 // !!! FIXME: this code counts on the register not having swizzles, etc.
5852 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
5853 src0, sizeof (src0));
5854 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
5855 src1, sizeof (src1));
5856 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
5857 src2, sizeof (src2));
5858 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
5859 src3, sizeof (src3));
5860 get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
5861 src4, sizeof (src4));
5862 get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5863
5864 output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
5865 output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
5866 output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
5867 output_line(ctx, "MOV %s.w, { 1.0, 1.0, 1.0, 1.0 };", dst);
5868 emit_ARB1_dest_modifiers(ctx);
5869 } // emit_ARB1_TEXM3X3
5870
5871 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH)
5872
5873 static void emit_ARB1_CMP(Context *ctx)
5874 {
5875 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5876 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5877 char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
5878 char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
5879 // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
5880 // switch src1 and src2 to get the same results.
5881 output_line(ctx, "CMP%s, %s, %s, %s;", dst, src0, src2, src1);
5882 emit_ARB1_dest_modifiers(ctx);
5883 } // emit_ARB1_CMP
5884
5885 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BEM)
5886
5887
5888 static void emit_ARB1_DP2ADD(Context *ctx)
5889 {
5890 if (support_nv4(ctx)) // nv4 has a built-in equivalent to DP2ADD.
5891 emit_ARB1_opcode_dsss(ctx, "DP2A");
5892 else
5893 {
5894 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5895 char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5896 char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
5897 char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
5898 char scratch[64];
5899
5900 // DP2ADD is:
5901 // dst = (src0.r * src1.r) + (src0.g * src1.g) + src2.replicate_swiz
5902 allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
5903 output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1);
5904 output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch);
5905 output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2);
5906 emit_ARB1_dest_modifiers(ctx);
5907 } // else
5908 } // emit_ARB1_DP2ADD
5909
5910
5911 static void emit_ARB1_DSX(Context *ctx)
5912 {
5913 if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSX.
5914 emit_ARB1_opcode_ds(ctx, "DDX");
5915 else
5916 failf(ctx, "DSX unsupported in %s profile", ctx->profile->name);
5917 } // emit_ARB1_DSX
5918
5919
5920 static void emit_ARB1_DSY(Context *ctx)
5921 {
5922 if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSY.
5923 emit_ARB1_opcode_ds(ctx, "DDY");
5924 else
5925 failf(ctx, "DSY unsupported in %s profile", ctx->profile->name);
5926 } // emit_ARB1_DSY
5927
5928 static void arb1_texld(Context *ctx, const char *opcode, const int texldd)
5929 {
5930 // !!! FIXME: Hack: "TEXH" is invalid in nv4. Fix this more cleanly.
5931 if ((ctx->dest_arg.result_mod & MOD_PP) && (support_nv4(ctx)))
5932 ctx->dest_arg.result_mod &= ~MOD_PP;
5933
5934 char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5935
5936 const int sm1 = !shader_version_atleast(ctx, 1, 4);
5937 const int regnum = sm1 ? ctx->dest_arg.regnum : ctx->source_args[1].regnum;
5938 RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, regnum);
5939
5940 const char *ttype = NULL;
5941 char src0[64];
5942 if (sm1)
5943 get_ARB1_destarg_varname(ctx, src0, sizeof (src0));
5944 else
5945 get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
5946 //char src1[64]; get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD?
5947
5948 char src2[64] = { 0 };
5949 char src3[64] = { 0 };
5950
5951 if (texldd)
5952 {
5953 make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
5954 make_ARB1_srcarg_string(ctx, 3, src3, sizeof (src3));
5955 } // if
5956
5957 // !!! FIXME: this should be in state_TEXLD, not in the arb1/glsl emitters.
5958 if (sreg == NULL)
5959 {
5960 fail(ctx, "TEXLD using undeclared sampler");
5961 return;
5962 } // if
5963
5964 // SM1 only specifies dst, so don't check swizzle there.
5965 if ( !sm1 && (!no_swizzle(ctx->source_args[1].swizzle)) )
5966 {
5967 // !!! FIXME: does this ever actually happen?
5968 fail(ctx, "BUG: can't handle TEXLD with sampler swizzle at the moment");
5969 } // if
5970
5971 switch ((const TextureType) sreg->index)
5972 {
5973 case TEXTURE_TYPE_2D: ttype = "2D"; break; // !!! FIXME: "RECT"?
5974 case TEXTURE_TYPE_CUBE: ttype = "CUBE"; break;
5975 case TEXTURE_TYPE_VOLUME: ttype = "3D"; break;
5976 default: fail(ctx, "unknown texture type"); return;
5977 } // switch
5978
5979 if (texldd)
5980 {
5981 output_line(ctx, "%s%s, %s, %s, %s, texture[%d], %s;", opcode, dst,
5982 src0, src2, src3, regnum, ttype);
5983 } // if
5984 else
5985 {
5986 output_line(ctx, "%s%s, %s, texture[%d], %s;", opcode, dst, src0,
5987 regnum, ttype);
5988 } // else
5989 } // arb1_texld
5990
5991
5992 static void emit_ARB1_TEXLDD(Context *ctx)
5993 {
5994 // With GL_NV_fragment_program2, we can use the TXD opcode.
5995 // In stock arb1, we can settle for a standard texld, which isn't
5996 // perfect, but oh well.
5997 if (support_nv2(ctx))
5998 arb1_texld(ctx, "TXD", 1);
5999 else
6000 arb1_texld(ctx, "TEX", 0);
6001 } // emit_ARB1_TEXLDD
6002
6003
6004 static void emit_ARB1_TEXLDL(Context *ctx)
6005 {
6006 if ((shader_is_vertex(ctx)) && (!support_nv3(ctx)))
6007 {
6008 failf(ctx, "Vertex shader TEXLDL unsupported in %s profile",
6009 ctx->profile->name);
6010 return;
6011 } // if
6012
6013 else if ((shader_is_pixel(ctx)) && (!support_nv2(ctx)))
6014 {
6015 failf(ctx, "Pixel shader TEXLDL unsupported in %s profile",
6016 ctx->profile->name);
6017 return;
6018 } // if
6019
6020 // !!! FIXME: this doesn't map exactly to TEXLDL. Review this.
6021 arb1_texld(ctx, "TXL", 0);
6022 } // emit_ARB1_TEXLDL
6023
6024
6025 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKP)
6026 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKC)
6027
6028 static void emit_ARB1_IFC(Context *ctx)
6029 {
6030 if (support_nv2(ctx))
6031 {
6032 static const char *comps[] = {
6033 "", "SGTC", "SEQC", "SGEC", "SGTC", "SNEC", "SLEC"
6034 };
6035
6036 if (ctx->instruction_controls >= STATICARRAYLEN(comps))
6037 {
6038 fail(ctx, "unknown comparison control");
6039 return;
6040 } // if
6041
6042 char src0[64];
6043 char src1[64];
6044 char scratch[64];
6045
6046 const char *comp = comps[ctx->instruction_controls];
6047 get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
6048 get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));
6049 allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
6050 output_line(ctx, "%s %s.x, %s, %s;", comp, scratch, src0, src1);
6051 nv2_if(ctx);
6052 } // if
6053
6054 else // stock ARB1 has no branching.
6055 {
6056 failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
6057 } // else
6058 } // emit_ARB1_IFC
6059
6060
6061 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(SETP)
6062
6063 static void emit_ARB1_DEF(Context *ctx)
6064 {
6065 const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
6066 char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
6067 char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1);
6068 char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1);
6069 char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1);
6070 char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1);
6071
6072 push_output(ctx, &ctx->globals);
6073 output_line(ctx, "PARAM %s = { %s, %s, %s, %s };",
6074 dst, val0, val1, val2, val3);
6075 pop_output(ctx);
6076 } // emit_ARB1_DEF
6077
6078 static void emit_ARB1_DEFI(Context *ctx)
6079 {
6080 char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
6081 const int32 *x = (const int32 *) ctx->dwords;
6082 push_output(ctx, &ctx->globals);
6083 output_line(ctx, "PARAM %s = { %d, %d, %d, %d };",
6084 dst, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
6085 pop_output(ctx);
6086 } // emit_ARB1_DEFI
6087
6088 static void emit_ARB1_DEFB(Context *ctx)
6089 {
6090 char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
6091 push_output(ctx, &ctx->globals);
6092 output_line(ctx, "PARAM %s = %d;", dst, ctx->dwords[0] ? 1 : 0);
6093 pop_output(ctx);
6094 } // emit_ARB1_DEFB
6095
6096 static void emit_ARB1_DCL(Context *ctx)
6097 {
6098 // no-op. We do this in our emit_attribute() and emit_uniform().
6099 } // emit_ARB1_DCL
6100
6101 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
6102
6103 static void emit_ARB1_TEXLD(Context *ctx)
6104 {
6105 if (!shader_version_atleast(ctx, 1, 4))
6106 {
6107 arb1_texld(ctx, "TEX", 0);
6108 return;
6109 } // if
6110
6111 else if (!shader_version_atleast(ctx, 2, 0))
6112 {
6113 // ps_1_4 is different, too!
6114 fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME
6115 return;
6116 } // if
6117
6118 // !!! FIXME: do texldb and texldp map between OpenGL and D3D correctly?
6119 if (ctx->instruction_controls == CONTROL_TEXLD)
6120 arb1_texld(ctx, "TEX", 0);
6121 else if (ctx->instruction_controls == CONTROL_TEXLDP)
6122 arb1_texld(ctx, "TXP", 0);
6123 else if (ctx->instruction_controls == CONTROL_TEXLDB)
6124 arb1_texld(ctx, "TXB", 0);
6125 } // emit_ARB1_TEXLD
6126
6127 #endif // SUPPORT_PROFILE_ARB1
6128
6129
6130 #if !AT_LEAST_ONE_PROFILE
6131 #error No profiles are supported. Fix your build.
6132 #endif
6133
6134 #define DEFINE_PROFILE(prof) { \
6135 MOJOSHADER_PROFILE_##prof, \
6136 emit_##prof##_start, \
6137 emit_##prof##_end, \
6138 emit_##prof##_phase, \
6139 emit_##prof##_global, \
6140 emit_##prof##_array, \
6141 emit_##prof##_const_array, \
6142 emit_##prof##_uniform, \
6143 emit_##prof##_sampler, \
6144 emit_##prof##_attribute, \
6145 emit_##prof##_finalize, \
6146 get_##prof##_varname, \
6147 get_##prof##_const_array_varname, \
6148 },
6149
6150 static const Profile profiles[] =
6151 {
6152 #if SUPPORT_PROFILE_D3D
6153 DEFINE_PROFILE(D3D)
6154 #endif
6155 #if SUPPORT_PROFILE_BYTECODE
6156 DEFINE_PROFILE(BYTECODE)
6157 #endif
6158 #if SUPPORT_PROFILE_GLSL
6159 DEFINE_PROFILE(GLSL)
6160 #endif
6161 #if SUPPORT_PROFILE_ARB1
6162 DEFINE_PROFILE(ARB1)
6163 #endif
6164 };
6165
6166 #undef DEFINE_PROFILE
6167
6168 // This is for profiles that extend other profiles...
6169 static const struct { const char *from; const char *to; } profileMap[] =
6170 {
6171 { MOJOSHADER_PROFILE_GLSL120, MOJOSHADER_PROFILE_GLSL },
6172 { MOJOSHADER_PROFILE_NV2, MOJOSHADER_PROFILE_ARB1 },
6173 { MOJOSHADER_PROFILE_NV3, MOJOSHADER_PROFILE_ARB1 },
6174 { MOJOSHADER_PROFILE_NV4, MOJOSHADER_PROFILE_ARB1 },
6175 };
6176
6177
6178 // The PROFILE_EMITTER_* items MUST be in the same order as profiles[]!
6179 #define PROFILE_EMITTERS(op) { \
6180 PROFILE_EMITTER_D3D(op) \
6181 PROFILE_EMITTER_BYTECODE(op) \
6182 PROFILE_EMITTER_GLSL(op) \
6183 PROFILE_EMITTER_ARB1(op) \
6184 }
6185
parse_destination_token(Context * ctx,DestArgInfo * info)6186 static int parse_destination_token(Context *ctx, DestArgInfo *info)
6187 {
6188 // !!! FIXME: recheck against the spec for ranges (like RASTOUT values, etc).
6189 if (ctx->tokencount == 0)
6190 {
6191 fail(ctx, "Out of tokens in destination parameter");
6192 return 0;
6193 } // if
6194
6195 const uint32 token = SWAP32(*(ctx->tokens));
6196 const int reserved1 = (int) ((token >> 14) & 0x3); // bits 14 through 15
6197 const int reserved2 = (int) ((token >> 31) & 0x1); // bit 31
6198
6199 info->token = ctx->tokens;
6200 info->regnum = (int) (token & 0x7ff); // bits 0 through 10
6201 info->relative = (int) ((token >> 13) & 0x1); // bit 13
6202 info->orig_writemask = (int) ((token >> 16) & 0xF); // bits 16 through 19
6203 info->result_mod = (int) ((token >> 20) & 0xF); // bits 20 through 23
6204 info->result_shift = (int) ((token >> 24) & 0xF); // bits 24 through 27 abc
6205 info->regtype = (RegisterType) (((token >> 28) & 0x7) | ((token >> 8) & 0x18)); // bits 28-30, 11-12
6206
6207 int writemask;
6208 if (isscalar(ctx, ctx->shader_type, info->regtype, info->regnum))
6209 writemask = 0x1; // just x.
6210 else
6211 writemask = info->orig_writemask;
6212
6213 set_dstarg_writemask(info, writemask); // bits 16 through 19.
6214
6215 // all the REG_TYPE_CONSTx types are the same register type, it's just
6216 // split up so its regnum can be > 2047 in the bytecode. Clean it up.
6217 if (info->regtype == REG_TYPE_CONST2)
6218 {
6219 info->regtype = REG_TYPE_CONST;
6220 info->regnum += 2048;
6221 } // else if
6222 else if (info->regtype == REG_TYPE_CONST3)
6223 {
6224 info->regtype = REG_TYPE_CONST;
6225 info->regnum += 4096;
6226 } // else if
6227 else if (info->regtype == REG_TYPE_CONST4)
6228 {
6229 info->regtype = REG_TYPE_CONST;
6230 info->regnum += 6144;
6231 } // else if
6232
6233 // swallow token for now, for multiple calls in a row.
6234 adjust_token_position(ctx, 1);
6235
6236 if (reserved1 != 0x0)
6237 fail(ctx, "Reserved bit #1 in destination token must be zero");
6238
6239 if (reserved2 != 0x1)
6240 fail(ctx, "Reserved bit #2 in destination token must be one");
6241
6242 if (info->relative)
6243 {
6244 if (!shader_is_vertex(ctx))
6245 fail(ctx, "Relative addressing in non-vertex shader");
6246 if (!shader_version_atleast(ctx, 3, 0))
6247 fail(ctx, "Relative addressing in vertex shader version < 3.0");
6248 if ((!ctx->ctab.have_ctab) && (!ctx->ignores_ctab))
6249 {
6250 // it's hard to do this efficiently without!
6251 fail(ctx, "relative addressing unsupported without a CTAB");
6252 } // if
6253
6254 // !!! FIXME: I don't have a shader that has a relative dest currently.
6255 fail(ctx, "Relative addressing of dest tokens is unsupported");
6256 return 2;
6257 } // if
6258
6259 const int s = info->result_shift;
6260 if (s != 0)
6261 {
6262 if (!shader_is_pixel(ctx))
6263 fail(ctx, "Result shift scale in non-pixel shader");
6264 if (shader_version_atleast(ctx, 2, 0))
6265 fail(ctx, "Result shift scale in pixel shader version >= 2.0");
6266 if ( ! (((s >= 1) && (s <= 3)) || ((s >= 0xD) && (s <= 0xF))) )
6267 fail(ctx, "Result shift scale isn't 1 to 3, or 13 to 15.");
6268 } // if
6269
6270 if (info->result_mod & MOD_PP) // Partial precision (pixel shaders only)
6271 {
6272 if (!shader_is_pixel(ctx))
6273 fail(ctx, "Partial precision result mod in non-pixel shader");
6274 } // if
6275
6276 if (info->result_mod & MOD_CENTROID) // Centroid (pixel shaders only)
6277 {
6278 if (!shader_is_pixel(ctx))
6279 fail(ctx, "Centroid result mod in non-pixel shader");
6280 else if (!ctx->centroid_allowed) // only on DCL opcodes!
6281 fail(ctx, "Centroid modifier not allowed here");
6282 } // if
6283
6284 if ((info->regtype < 0) || (info->regtype > REG_TYPE_MAX))
6285 fail(ctx, "Register type is out of range");
6286
6287 if (!isfail(ctx))
6288 set_used_register(ctx, info->regtype, info->regnum, 1);
6289
6290 return 1;
6291 } // parse_destination_token
6292
6293
determine_constants_arrays(Context * ctx)6294 static void determine_constants_arrays(Context *ctx)
6295 {
6296 // Only process this stuff once. This is called after all DEF* opcodes
6297 // could have been parsed.
6298 if (ctx->determined_constants_arrays)
6299 return;
6300
6301 ctx->determined_constants_arrays = 1;
6302
6303 if (ctx->constant_count <= 1)
6304 return; // nothing to sort or group.
6305
6306 // Sort the linked list into an array for easier tapdancing...
6307 ConstantsList **array = (ConstantsList **) alloca(sizeof (ConstantsList *) * (ctx->constant_count + 1));
6308 ConstantsList *item = ctx->constants;
6309 int i;
6310
6311 for (i = 0; i < ctx->constant_count; i++)
6312 {
6313 if (item == NULL)
6314 {
6315 fail(ctx, "BUG: mismatched constant list and count");
6316 return;
6317 } // if
6318
6319 array[i] = item;
6320 item = item->next;
6321 } // for
6322
6323 array[ctx->constant_count] = NULL;
6324
6325 // bubble sort ftw.
6326 int sorted;
6327 do
6328 {
6329 sorted = 1;
6330 for (i = 0; i < ctx->constant_count-1; i++)
6331 {
6332 if (array[i]->constant.index > array[i+1]->constant.index)
6333 {
6334 ConstantsList *tmp = array[i];
6335 array[i] = array[i+1];
6336 array[i+1] = tmp;
6337 sorted = 0;
6338 } // if
6339 } // for
6340 } while (!sorted);
6341
6342 // okay, sorted. While we're here, let's redo the linked list in order...
6343 for (i = 0; i < ctx->constant_count; i++)
6344 array[i]->next = array[i+1];
6345 ctx->constants = array[0];
6346
6347 // now figure out the groupings of constants and add to ctx->variables...
6348 int start = -1;
6349 int prev = -1;
6350 int count = 0;
6351 const int hi = ctx->constant_count;
6352 for (i = 0; i <= hi; i++)
6353 {
6354 if (array[i] && (array[i]->constant.type != MOJOSHADER_UNIFORM_FLOAT))
6355 continue; // we only care about REG_TYPE_CONST for array groups.
6356
6357 if (start == -1)
6358 {
6359 prev = start = i; // first REG_TYPE_CONST we've seen. Mark it!
6360 continue;
6361 } // if
6362
6363 // not a match (or last item in the array)...see if we had a
6364 // contiguous set before this point...
6365 if ( (array[i]) && (array[i]->constant.index == (array[prev]->constant.index + 1)) )
6366 count++;
6367 else
6368 {
6369 if (count > 0) // multiple constants in the set?
6370 {
6371 VariableList *var;
6372 var = (VariableList *) Malloc(ctx, sizeof (VariableList));
6373 if (var == NULL)
6374 break;
6375
6376 var->type = MOJOSHADER_UNIFORM_FLOAT;
6377 var->index = array[start]->constant.index;
6378 var->count = (array[prev]->constant.index - var->index) + 1;
6379 var->constant = array[start];
6380 var->used = 0;
6381 var->emit_position = -1;
6382 var->next = ctx->variables;
6383 ctx->variables = var;
6384 } // else
6385
6386 start = i; // set this as new start of sequence.
6387 } // if
6388
6389 prev = i;
6390 } // for
6391 } // determine_constants_arrays
6392
6393
adjust_swizzle(const Context * ctx,const RegisterType regtype,const int regnum,const int swizzle)6394 static int adjust_swizzle(const Context *ctx, const RegisterType regtype,
6395 const int regnum, const int swizzle)
6396 {
6397 if (regtype != REG_TYPE_INPUT) // !!! FIXME: maybe lift this later?
6398 return swizzle;
6399 else if (ctx->swizzles_count == 0)
6400 return swizzle;
6401
6402 const RegisterList *reg = reglist_find(&ctx->attributes, regtype, regnum);
6403 if (reg == NULL)
6404 return swizzle;
6405
6406 size_t i;
6407 for (i = 0; i < ctx->swizzles_count; i++)
6408 {
6409 const MOJOSHADER_swizzle *swiz = &ctx->swizzles[i];
6410 if ((swiz->usage == reg->usage) && (swiz->index == reg->index))
6411 {
6412 return ( (((int)(swiz->swizzles[((swizzle >> 0) & 0x3)])) << 0) |
6413 (((int)(swiz->swizzles[((swizzle >> 2) & 0x3)])) << 2) |
6414 (((int)(swiz->swizzles[((swizzle >> 4) & 0x3)])) << 4) |
6415 (((int)(swiz->swizzles[((swizzle >> 6) & 0x3)])) << 6) );
6416 } // if
6417 } // for
6418
6419 return swizzle;
6420 } // adjust_swizzle
6421
6422
parse_source_token(Context * ctx,SourceArgInfo * info)6423 static int parse_source_token(Context *ctx, SourceArgInfo *info)
6424 {
6425 int retval = 1;
6426
6427 if (ctx->tokencount == 0)
6428 {
6429 fail(ctx, "Out of tokens in source parameter");
6430 return 0;
6431 } // if
6432
6433 const uint32 token = SWAP32(*(ctx->tokens));
6434 const int reserved1 = (int) ((token >> 14) & 0x3); // bits 14 through 15
6435 const int reserved2 = (int) ((token >> 31) & 0x1); // bit 31
6436
6437 info->token = ctx->tokens;
6438 info->regnum = (int) (token & 0x7ff); // bits 0 through 10
6439 info->relative = (int) ((token >> 13) & 0x1); // bit 13
6440 const int swizzle = (int) ((token >> 16) & 0xFF); // bits 16 through 23
6441 info->src_mod = (SourceMod) ((token >> 24) & 0xF); // bits 24 through 27
6442 info->regtype = (RegisterType) (((token >> 28) & 0x7) | ((token >> 8) & 0x18)); // bits 28-30, 11-12
6443
6444 // all the REG_TYPE_CONSTx types are the same register type, it's just
6445 // split up so its regnum can be > 2047 in the bytecode. Clean it up.
6446 if (info->regtype == REG_TYPE_CONST2)
6447 {
6448 info->regtype = REG_TYPE_CONST;
6449 info->regnum += 2048;
6450 } // else if
6451 else if (info->regtype == REG_TYPE_CONST3)
6452 {
6453 info->regtype = REG_TYPE_CONST;
6454 info->regnum += 4096;
6455 } // else if
6456 else if (info->regtype == REG_TYPE_CONST4)
6457 {
6458 info->regtype = REG_TYPE_CONST;
6459 info->regnum += 6144;
6460 } // else if
6461
6462 info->swizzle = adjust_swizzle(ctx, info->regtype, info->regnum, swizzle);
6463 info->swizzle_x = ((info->swizzle >> 0) & 0x3);
6464 info->swizzle_y = ((info->swizzle >> 2) & 0x3);
6465 info->swizzle_z = ((info->swizzle >> 4) & 0x3);
6466 info->swizzle_w = ((info->swizzle >> 6) & 0x3);
6467
6468 // swallow token for now, for multiple calls in a row.
6469 adjust_token_position(ctx, 1);
6470
6471 if (reserved1 != 0x0)
6472 fail(ctx, "Reserved bits #1 in source token must be zero");
6473
6474 if (reserved2 != 0x1)
6475 fail(ctx, "Reserved bit #2 in source token must be one");
6476
6477 if ((info->relative) && (ctx->tokencount == 0))
6478 {
6479 fail(ctx, "Out of tokens in relative source parameter");
6480 info->relative = 0; // don't try to process it.
6481 } // if
6482
6483 if (info->relative)
6484 {
6485 if ( (shader_is_pixel(ctx)) && (!shader_version_atleast(ctx, 3, 0)) )
6486 fail(ctx, "Relative addressing in pixel shader version < 3.0");
6487
6488 const uint32 reltoken = SWAP32(*(ctx->tokens));
6489 // swallow token for now, for multiple calls in a row.
6490 adjust_token_position(ctx, 1);
6491
6492 const int relswiz = (int) ((reltoken >> 16) & 0xFF);
6493 info->relative_regnum = (int) (reltoken & 0x7ff);
6494 info->relative_regtype = (RegisterType)
6495 (((reltoken >> 28) & 0x7) |
6496 ((reltoken >> 8) & 0x18));
6497
6498 if (((reltoken >> 31) & 0x1) == 0)
6499 fail(ctx, "bit #31 in relative address must be set");
6500
6501 if ((reltoken & 0xF00E000) != 0) // usused bits.
6502 fail(ctx, "relative address reserved bit must be zero");
6503
6504 switch (info->relative_regtype)
6505 {
6506 case REG_TYPE_LOOP:
6507 case REG_TYPE_ADDRESS:
6508 break;
6509 default:
6510 fail(ctx, "invalid register for relative address");
6511 break;
6512 } // switch
6513
6514 if (info->relative_regnum != 0) // true for now.
6515 fail(ctx, "invalid register for relative address");
6516
6517 if (!replicate_swizzle(relswiz))
6518 fail(ctx, "relative address needs replicate swizzle");
6519
6520 info->relative_component = (relswiz & 0x3);
6521
6522 if (info->regtype == REG_TYPE_INPUT)
6523 {
6524 if ( (shader_is_pixel(ctx)) || (!shader_version_atleast(ctx, 3, 0)) )
6525 fail(ctx, "relative addressing of input registers not supported in this shader model");
6526 ctx->have_relative_input_registers = 1;
6527 } // if
6528 else if (info->regtype == REG_TYPE_CONST)
6529 {
6530 // figure out what array we're in...
6531 if (!ctx->ignores_ctab)
6532 {
6533 if (!ctx->ctab.have_ctab) // hard to do efficiently without!
6534 fail(ctx, "relative addressing unsupported without a CTAB");
6535 else
6536 {
6537 determine_constants_arrays(ctx);
6538
6539 VariableList *var;
6540 const int reltarget = info->regnum;
6541 for (var = ctx->variables; var != NULL; var = var->next)
6542 {
6543 const int lo = var->index;
6544 if ( (reltarget >= lo) && (reltarget < (lo + var->count)) )
6545 break; // match!
6546 } // for
6547
6548 if (var == NULL)
6549 fail(ctx, "relative addressing of indeterminate array");
6550 else
6551 {
6552 var->used = 1;
6553 info->relative_array = var;
6554 set_used_register(ctx, info->relative_regtype, info->relative_regnum, 0);
6555 } // else
6556 } // else
6557 } // if
6558 } // else if
6559 else
6560 {
6561 fail(ctx, "relative addressing of invalid register");
6562 } // else
6563
6564 retval++;
6565 } // if
6566
6567 switch (info->src_mod)
6568 {
6569 case SRCMOD_NONE:
6570 case SRCMOD_ABSNEGATE:
6571 case SRCMOD_ABS:
6572 case SRCMOD_NEGATE:
6573 break; // okay in any shader model.
6574
6575 // apparently these are only legal in Shader Model 1.x ...
6576 case SRCMOD_BIASNEGATE:
6577 case SRCMOD_BIAS:
6578 case SRCMOD_SIGNNEGATE:
6579 case SRCMOD_SIGN:
6580 case SRCMOD_COMPLEMENT:
6581 case SRCMOD_X2NEGATE:
6582 case SRCMOD_X2:
6583 case SRCMOD_DZ:
6584 case SRCMOD_DW:
6585 if (shader_version_atleast(ctx, 2, 0))
6586 fail(ctx, "illegal source mod for this Shader Model.");
6587 break;
6588
6589 case SRCMOD_NOT: // !!! FIXME: I _think_ this is right...
6590 if (shader_version_atleast(ctx, 2, 0))
6591 {
6592 if (info->regtype != REG_TYPE_PREDICATE)
6593 fail(ctx, "NOT only allowed on predicate register.");
6594 } // if
6595 break;
6596
6597 default:
6598 fail(ctx, "Unknown source modifier");
6599 } // switch
6600
6601 // !!! FIXME: docs say this for sm3 ... check these!
6602 // "The negate modifier cannot be used on second source register of these
6603 // instructions: m3x2 - ps, m3x3 - ps, m3x4 - ps, m4x3 - ps, and
6604 // m4x4 - ps."
6605 // "If any version 3 shader reads from one or more constant float
6606 // registers (c#), one of the following must be true.
6607 // All of the constant floating-point registers must use the abs modifier.
6608 // None of the constant floating-point registers can use the abs modifier.
6609
6610 if (!isfail(ctx))
6611 {
6612 RegisterList *reg;
6613 reg = set_used_register(ctx, info->regtype, info->regnum, 0);
6614 // !!! FIXME: this test passes if you write to the register
6615 // !!! FIXME: in this same instruction, because we parse the
6616 // !!! FIXME: destination token first.
6617 // !!! FIXME: Microsoft's shader validation explicitly checks temp
6618 // !!! FIXME: registers for this...do they check other writable ones?
6619 if ((info->regtype == REG_TYPE_TEMP) && (reg) && (!reg->written))
6620 failf(ctx, "Temp register r%d used uninitialized", info->regnum);
6621 } // if
6622
6623 return retval;
6624 } // parse_source_token
6625
6626
parse_predicated_token(Context * ctx)6627 static int parse_predicated_token(Context *ctx)
6628 {
6629 SourceArgInfo *arg = &ctx->predicate_arg;
6630 parse_source_token(ctx, arg);
6631 if (arg->regtype != REG_TYPE_PREDICATE)
6632 fail(ctx, "Predicated instruction but not predicate register!");
6633 if ((arg->src_mod != SRCMOD_NONE) && (arg->src_mod != SRCMOD_NOT))
6634 fail(ctx, "Predicated instruction register is not NONE or NOT");
6635 if ( !no_swizzle(arg->swizzle) && !replicate_swizzle(arg->swizzle) )
6636 fail(ctx, "Predicated instruction register has wrong swizzle");
6637 if (arg->relative) // I'm pretty sure this is illegal...?
6638 fail(ctx, "relative addressing in predicated token");
6639
6640 return 1;
6641 } // parse_predicated_token
6642
6643
parse_args_NULL(Context * ctx)6644 static int parse_args_NULL(Context *ctx)
6645 {
6646 return 1;
6647 } // parse_args_NULL
6648
6649
parse_args_DEF(Context * ctx)6650 static int parse_args_DEF(Context *ctx)
6651 {
6652 parse_destination_token(ctx, &ctx->dest_arg);
6653 if (ctx->dest_arg.regtype != REG_TYPE_CONST)
6654 fail(ctx, "DEF using non-CONST register");
6655 if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
6656 fail(ctx, "relative addressing in DEF");
6657
6658 ctx->dwords[0] = SWAP32(ctx->tokens[0]);
6659 ctx->dwords[1] = SWAP32(ctx->tokens[1]);
6660 ctx->dwords[2] = SWAP32(ctx->tokens[2]);
6661 ctx->dwords[3] = SWAP32(ctx->tokens[3]);
6662
6663 return 6;
6664 } // parse_args_DEF
6665
6666
parse_args_DEFI(Context * ctx)6667 static int parse_args_DEFI(Context *ctx)
6668 {
6669 parse_destination_token(ctx, &ctx->dest_arg);
6670 if (ctx->dest_arg.regtype != REG_TYPE_CONSTINT)
6671 fail(ctx, "DEFI using non-CONSTING register");
6672 if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
6673 fail(ctx, "relative addressing in DEFI");
6674
6675 ctx->dwords[0] = SWAP32(ctx->tokens[0]);
6676 ctx->dwords[1] = SWAP32(ctx->tokens[1]);
6677 ctx->dwords[2] = SWAP32(ctx->tokens[2]);
6678 ctx->dwords[3] = SWAP32(ctx->tokens[3]);
6679
6680 return 6;
6681 } // parse_args_DEFI
6682
6683
parse_args_DEFB(Context * ctx)6684 static int parse_args_DEFB(Context *ctx)
6685 {
6686 parse_destination_token(ctx, &ctx->dest_arg);
6687 if (ctx->dest_arg.regtype != REG_TYPE_CONSTBOOL)
6688 fail(ctx, "DEFB using non-CONSTBOOL register");
6689 if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
6690 fail(ctx, "relative addressing in DEFB");
6691
6692 ctx->dwords[0] = *(ctx->tokens) ? 1 : 0;
6693
6694 return 3;
6695 } // parse_args_DEFB
6696
6697
valid_texture_type(const uint32 ttype)6698 static int valid_texture_type(const uint32 ttype)
6699 {
6700 switch ((const TextureType) ttype)
6701 {
6702 case TEXTURE_TYPE_2D:
6703 case TEXTURE_TYPE_CUBE:
6704 case TEXTURE_TYPE_VOLUME:
6705 return 1; // it's okay.
6706 } // switch
6707
6708 return 0;
6709 } // valid_texture_type
6710
6711
6712 // !!! FIXME: this function is kind of a mess.
parse_args_DCL(Context * ctx)6713 static int parse_args_DCL(Context *ctx)
6714 {
6715 int unsupported = 0;
6716 const uint32 token = SWAP32(*(ctx->tokens));
6717 const int reserved1 = (int) ((token >> 31) & 0x1); // bit 31
6718 uint32 reserved_mask = 0x00000000;
6719
6720 if (reserved1 != 0x1)
6721 fail(ctx, "Bit #31 in DCL token must be one");
6722
6723 ctx->centroid_allowed = 1;
6724 adjust_token_position(ctx, 1);
6725 parse_destination_token(ctx, &ctx->dest_arg);
6726 ctx->centroid_allowed = 0;
6727
6728 if (ctx->dest_arg.result_shift != 0) // I'm pretty sure this is illegal...?
6729 fail(ctx, "shift scale in DCL");
6730 if (ctx->dest_arg.relative) // I'm pretty sure this is illegal...?
6731 fail(ctx, "relative addressing in DCL");
6732
6733 const RegisterType regtype = ctx->dest_arg.regtype;
6734 const int regnum = ctx->dest_arg.regnum;
6735 if ( (shader_is_pixel(ctx)) && (shader_version_atleast(ctx, 3, 0)) )
6736 {
6737 if (regtype == REG_TYPE_INPUT)
6738 {
6739 const uint32 usage = (token & 0xF);
6740 const uint32 index = ((token >> 16) & 0xF);
6741 reserved_mask = 0x7FF0FFE0;
6742 ctx->dwords[0] = usage;
6743 ctx->dwords[1] = index;
6744 } // if
6745
6746 else if (regtype == REG_TYPE_MISCTYPE)
6747 {
6748 const MiscTypeType mt = (MiscTypeType) regnum;
6749 if (mt == MISCTYPE_TYPE_POSITION)
6750 reserved_mask = 0x7FFFFFFF;
6751 else if (mt == MISCTYPE_TYPE_FACE)
6752 {
6753 reserved_mask = 0x7FFFFFFF;
6754 if (!writemask_xyzw(ctx->dest_arg.orig_writemask))
6755 fail(ctx, "DCL face writemask must be full");
6756 if (ctx->dest_arg.result_mod != 0)
6757 fail(ctx, "DCL face result modifier must be zero");
6758 if (ctx->dest_arg.result_shift != 0)
6759 fail(ctx, "DCL face shift scale must be zero");
6760 } // else if
6761 else
6762 {
6763 unsupported = 1;
6764 } // else
6765
6766 ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_UNKNOWN;
6767 ctx->dwords[1] = 0;
6768 } // else if
6769
6770 else if (regtype == REG_TYPE_TEXTURE)
6771 {
6772 const uint32 usage = (token & 0xF);
6773 const uint32 index = ((token >> 16) & 0xF);
6774 if (usage == MOJOSHADER_USAGE_TEXCOORD)
6775 {
6776 if (index > 7)
6777 fail(ctx, "DCL texcoord usage must have 0-7 index");
6778 } // if
6779 else if (usage == MOJOSHADER_USAGE_COLOR)
6780 {
6781 if (index != 0)
6782 fail(ctx, "DCL color usage must have 0 index");
6783 } // else if
6784 else
6785 {
6786 fail(ctx, "Invalid DCL texture usage");
6787 } // else
6788
6789 reserved_mask = 0x7FF0FFE0;
6790 ctx->dwords[0] = usage;
6791 ctx->dwords[1] = index;
6792 } // else if
6793
6794 else if (regtype == REG_TYPE_SAMPLER)
6795 {
6796 const uint32 ttype = ((token >> 27) & 0xF);
6797 if (!valid_texture_type(ttype))
6798 fail(ctx, "unknown sampler texture type");
6799 reserved_mask = 0x7FFFFFF;
6800 ctx->dwords[0] = ttype;
6801 } // else if
6802
6803 else
6804 {
6805 unsupported = 1;
6806 } // else
6807 } // if
6808
6809 else if ( (shader_is_pixel(ctx)) && (shader_version_atleast(ctx, 2, 0)) )
6810 {
6811 if (regtype == REG_TYPE_INPUT)
6812 {
6813 ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_COLOR;
6814 ctx->dwords[1] = regnum;
6815 reserved_mask = 0x7FFFFFFF;
6816 } // if
6817 else if (regtype == REG_TYPE_TEXTURE)
6818 {
6819 ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_TEXCOORD;
6820 ctx->dwords[1] = regnum;
6821 reserved_mask = 0x7FFFFFFF;
6822 } // else if
6823 else if (regtype == REG_TYPE_SAMPLER)
6824 {
6825 const uint32 ttype = ((token >> 27) & 0xF);
6826 if (!valid_texture_type(ttype))
6827 fail(ctx, "unknown sampler texture type");
6828 reserved_mask = 0x7FFFFFF;
6829 ctx->dwords[0] = ttype;
6830 } // else if
6831 else
6832 {
6833 unsupported = 1;
6834 } // else
6835 } // if
6836
6837 else if ( (shader_is_vertex(ctx)) && (shader_version_atleast(ctx, 3, 0)) )
6838 {
6839 if ((regtype == REG_TYPE_INPUT) || (regtype == REG_TYPE_OUTPUT))
6840 {
6841 const uint32 usage = (token & 0xF);
6842 const uint32 index = ((token >> 16) & 0xF);
6843 reserved_mask = 0x7FF0FFE0;
6844 ctx->dwords[0] = usage;
6845 ctx->dwords[1] = index;
6846 } // if
6847 else
6848 {
6849 unsupported = 1;
6850 } // else
6851 } // else if
6852
6853 else if ( (shader_is_vertex(ctx)) && (shader_version_atleast(ctx, 1, 1)) )
6854 {
6855 if (regtype == REG_TYPE_INPUT)
6856 {
6857 const uint32 usage = (token & 0xF);
6858 const uint32 index = ((token >> 16) & 0xF);
6859 reserved_mask = 0x7FF0FFE0;
6860 ctx->dwords[0] = usage;
6861 ctx->dwords[1] = index;
6862 } // if
6863 else
6864 {
6865 unsupported = 1;
6866 } // else
6867 } // else if
6868
6869 else
6870 {
6871 unsupported = 1;
6872 } // else
6873
6874 if (unsupported)
6875 fail(ctx, "invalid DCL register type for this shader model");
6876
6877 if ((token & reserved_mask) != 0)
6878 fail(ctx, "reserved bits in DCL dword aren't zero");
6879
6880 return 3;
6881 } // parse_args_DCL
6882
6883
parse_args_D(Context * ctx)6884 static int parse_args_D(Context *ctx)
6885 {
6886 int retval = 1;
6887 retval += parse_destination_token(ctx, &ctx->dest_arg);
6888 return retval;
6889 } // parse_args_D
6890
6891
parse_args_S(Context * ctx)6892 static int parse_args_S(Context *ctx)
6893 {
6894 int retval = 1;
6895 retval += parse_source_token(ctx, &ctx->source_args[0]);
6896 return retval;
6897 } // parse_args_S
6898
6899
parse_args_SS(Context * ctx)6900 static int parse_args_SS(Context *ctx)
6901 {
6902 int retval = 1;
6903 retval += parse_source_token(ctx, &ctx->source_args[0]);
6904 retval += parse_source_token(ctx, &ctx->source_args[1]);
6905 return retval;
6906 } // parse_args_SS
6907
6908
parse_args_DS(Context * ctx)6909 static int parse_args_DS(Context *ctx)
6910 {
6911 int retval = 1;
6912 retval += parse_destination_token(ctx, &ctx->dest_arg);
6913 retval += parse_source_token(ctx, &ctx->source_args[0]);
6914 return retval;
6915 } // parse_args_DS
6916
6917
parse_args_DSS(Context * ctx)6918 static int parse_args_DSS(Context *ctx)
6919 {
6920 int retval = 1;
6921 retval += parse_destination_token(ctx, &ctx->dest_arg);
6922 retval += parse_source_token(ctx, &ctx->source_args[0]);
6923 retval += parse_source_token(ctx, &ctx->source_args[1]);
6924 return retval;
6925 } // parse_args_DSS
6926
6927
parse_args_DSSS(Context * ctx)6928 static int parse_args_DSSS(Context *ctx)
6929 {
6930 int retval = 1;
6931 retval += parse_destination_token(ctx, &ctx->dest_arg);
6932 retval += parse_source_token(ctx, &ctx->source_args[0]);
6933 retval += parse_source_token(ctx, &ctx->source_args[1]);
6934 retval += parse_source_token(ctx, &ctx->source_args[2]);
6935 return retval;
6936 } // parse_args_DSSS
6937
6938
parse_args_DSSSS(Context * ctx)6939 static int parse_args_DSSSS(Context *ctx)
6940 {
6941 int retval = 1;
6942 retval += parse_destination_token(ctx, &ctx->dest_arg);
6943 retval += parse_source_token(ctx, &ctx->source_args[0]);
6944 retval += parse_source_token(ctx, &ctx->source_args[1]);
6945 retval += parse_source_token(ctx, &ctx->source_args[2]);
6946 retval += parse_source_token(ctx, &ctx->source_args[3]);
6947 return retval;
6948 } // parse_args_DSSSS
6949
6950
parse_args_SINCOS(Context * ctx)6951 static int parse_args_SINCOS(Context *ctx)
6952 {
6953 // this opcode needs extra registers for sm2 and lower.
6954 if (!shader_version_atleast(ctx, 3, 0))
6955 return parse_args_DSSS(ctx);
6956 return parse_args_DS(ctx);
6957 } // parse_args_SINCOS
6958
6959
parse_args_TEXCRD(Context * ctx)6960 static int parse_args_TEXCRD(Context *ctx)
6961 {
6962 // added extra register in ps_1_4.
6963 if (shader_version_atleast(ctx, 1, 4))
6964 return parse_args_DS(ctx);
6965 return parse_args_D(ctx);
6966 } // parse_args_TEXCRD
6967
6968
parse_args_TEXLD(Context * ctx)6969 static int parse_args_TEXLD(Context *ctx)
6970 {
6971 // different registers in px_1_3, ps_1_4, and ps_2_0!
6972 if (shader_version_atleast(ctx, 2, 0))
6973 return parse_args_DSS(ctx);
6974 else if (shader_version_atleast(ctx, 1, 4))
6975 return parse_args_DS(ctx);
6976 return parse_args_D(ctx);
6977 } // parse_args_TEXLD
6978
6979
6980 // State machine functions...
6981
alloc_constant_listitem(Context * ctx)6982 static ConstantsList *alloc_constant_listitem(Context *ctx)
6983 {
6984 ConstantsList *item = (ConstantsList *) Malloc(ctx, sizeof (ConstantsList));
6985 if (item == NULL)
6986 return NULL;
6987
6988 memset(&item->constant, '\0', sizeof (MOJOSHADER_constant));
6989 item->next = ctx->constants;
6990 ctx->constants = item;
6991 ctx->constant_count++;
6992
6993 return item;
6994 } // alloc_constant_listitem
6995
6996
state_DEF(Context * ctx)6997 static void state_DEF(Context *ctx)
6998 {
6999 const RegisterType regtype = ctx->dest_arg.regtype;
7000 const int regnum = ctx->dest_arg.regnum;
7001
7002 // !!! FIXME: fail if same register is defined twice.
7003
7004 if (ctx->instruction_count != 0)
7005 fail(ctx, "DEF token must come before any instructions");
7006 else if (regtype != REG_TYPE_CONST)
7007 fail(ctx, "DEF token using invalid register");
7008 else
7009 {
7010 ConstantsList *item = alloc_constant_listitem(ctx);
7011 if (item != NULL)
7012 {
7013 item->constant.index = regnum;
7014 item->constant.type = MOJOSHADER_UNIFORM_FLOAT;
7015 memcpy(item->constant.value.f, ctx->dwords,
7016 sizeof (item->constant.value.f));
7017 set_defined_register(ctx, regtype, regnum);
7018 } // if
7019 } // else
7020 } // state_DEF
7021
state_DEFI(Context * ctx)7022 static void state_DEFI(Context *ctx)
7023 {
7024 const RegisterType regtype = ctx->dest_arg.regtype;
7025 const int regnum = ctx->dest_arg.regnum;
7026
7027 // !!! FIXME: fail if same register is defined twice.
7028
7029 if (ctx->instruction_count != 0)
7030 fail(ctx, "DEFI token must come before any instructions");
7031 else if (regtype != REG_TYPE_CONSTINT)
7032 fail(ctx, "DEFI token using invalid register");
7033 else
7034 {
7035 ConstantsList *item = alloc_constant_listitem(ctx);
7036 if (item != NULL)
7037 {
7038 item->constant.index = regnum;
7039 item->constant.type = MOJOSHADER_UNIFORM_INT;
7040 memcpy(item->constant.value.i, ctx->dwords,
7041 sizeof (item->constant.value.i));
7042
7043 set_defined_register(ctx, regtype, regnum);
7044 } // if
7045 } // else
7046 } // state_DEFI
7047
state_DEFB(Context * ctx)7048 static void state_DEFB(Context *ctx)
7049 {
7050 const RegisterType regtype = ctx->dest_arg.regtype;
7051 const int regnum = ctx->dest_arg.regnum;
7052
7053 // !!! FIXME: fail if same register is defined twice.
7054
7055 if (ctx->instruction_count != 0)
7056 fail(ctx, "DEFB token must come before any instructions");
7057 else if (regtype != REG_TYPE_CONSTBOOL)
7058 fail(ctx, "DEFB token using invalid register");
7059 else
7060 {
7061 ConstantsList *item = alloc_constant_listitem(ctx);
7062 if (item != NULL)
7063 {
7064 item->constant.index = regnum;
7065 item->constant.type = MOJOSHADER_UNIFORM_BOOL;
7066 item->constant.value.b = ctx->dwords[0] ? 1 : 0;
7067 set_defined_register(ctx, regtype, regnum);
7068 } // if
7069 } // else
7070 } // state_DEFB
7071
state_DCL(Context * ctx)7072 static void state_DCL(Context *ctx)
7073 {
7074 const DestArgInfo *arg = &ctx->dest_arg;
7075 const RegisterType regtype = arg->regtype;
7076 const int regnum = arg->regnum;
7077 const int wmask = arg->writemask;
7078 const int mods = arg->result_mod;
7079
7080 // parse_args_DCL() does a lot of state checking before we get here.
7081
7082 // !!! FIXME: apparently vs_3_0 can use sampler registers now.
7083 // !!! FIXME: (but only s0 through s3, not all 16 of them.)
7084
7085 if (ctx->instruction_count != 0)
7086 fail(ctx, "DCL token must come before any instructions");
7087
7088 else if (shader_is_vertex(ctx))
7089 {
7090 const MOJOSHADER_usage usage = (const MOJOSHADER_usage) ctx->dwords[0];
7091 const int index = ctx->dwords[1];
7092 if (usage >= MOJOSHADER_USAGE_TOTAL)
7093 {
7094 fail(ctx, "unknown DCL usage");
7095 return;
7096 } // if
7097 add_attribute_register(ctx, regtype, regnum, usage, index, wmask, mods);
7098 } // if
7099
7100 else if (shader_is_pixel(ctx))
7101 {
7102 if (regtype == REG_TYPE_SAMPLER)
7103 add_sampler(ctx, regnum, (TextureType) ctx->dwords[0], 0);
7104 else
7105 {
7106 const MOJOSHADER_usage usage = (MOJOSHADER_usage) ctx->dwords[0];
7107 const int index = ctx->dwords[1];
7108 add_attribute_register(ctx, regtype, regnum, usage, index, wmask, mods);
7109 } // else
7110 } // else if
7111
7112 else
7113 {
7114 fail(ctx, "unsupported shader type."); // should be caught elsewhere.
7115 return;
7116 } // else
7117
7118 set_defined_register(ctx, regtype, regnum);
7119 } // state_DCL
7120
state_TEXCRD(Context * ctx)7121 static void state_TEXCRD(Context *ctx)
7122 {
7123 if (shader_version_atleast(ctx, 2, 0))
7124 fail(ctx, "TEXCRD in Shader Model >= 2.0"); // apparently removed.
7125 } // state_TEXCRD
7126
state_FRC(Context * ctx)7127 static void state_FRC(Context *ctx)
7128 {
7129 const DestArgInfo *dst = &ctx->dest_arg;
7130
7131 if (dst->result_mod & MOD_SATURATE) // according to msdn...
7132 fail(ctx, "FRC destination can't use saturate modifier");
7133
7134 else if (!shader_version_atleast(ctx, 2, 0))
7135 {
7136 if (!writemask_y(dst->writemask) && !writemask_xy(dst->writemask))
7137 fail(ctx, "FRC writemask must be .y or .xy for shader model 1.x");
7138 } // else if
7139 } // state_FRC
7140
7141
7142 // replicate the matrix registers to source args. The D3D profile will
7143 // only use the one legitimate argument, but this saves other profiles
7144 // from having to build this.
srcarg_matrix_replicate(Context * ctx,const int idx,const int rows)7145 static void srcarg_matrix_replicate(Context *ctx, const int idx,
7146 const int rows)
7147 {
7148 int i;
7149 SourceArgInfo *src = &ctx->source_args[idx];
7150 SourceArgInfo *dst = &ctx->source_args[idx+1];
7151 for (i = 0; i < (rows-1); i++, dst++)
7152 {
7153 memcpy(dst, src, sizeof (SourceArgInfo));
7154 dst->regnum += (i + 1);
7155 set_used_register(ctx, dst->regtype, dst->regnum, 0);
7156 } // for
7157 } // srcarg_matrix_replicate
7158
state_M4X4(Context * ctx)7159 static void state_M4X4(Context *ctx)
7160 {
7161 const DestArgInfo *info = &ctx->dest_arg;
7162 if (!writemask_xyzw(info->writemask))
7163 fail(ctx, "M4X4 writemask must be full");
7164
7165 // !!! FIXME: MSDN:
7166 //The xyzw (default) mask is required for the destination register. Negate and swizzle modifiers are allowed for src0, but not for src1.
7167 //Swizzle and negate modifiers are invalid for the src0 register. The dest and src0 registers cannot be the same.
7168
7169 srcarg_matrix_replicate(ctx, 1, 4);
7170 } // state_M4X4
7171
state_M4X3(Context * ctx)7172 static void state_M4X3(Context *ctx)
7173 {
7174 const DestArgInfo *info = &ctx->dest_arg;
7175 if (!writemask_xyz(info->writemask))
7176 fail(ctx, "M4X3 writemask must be .xyz");
7177
7178 // !!! FIXME: MSDN stuff
7179
7180 srcarg_matrix_replicate(ctx, 1, 3);
7181 } // state_M4X3
7182
state_M3X4(Context * ctx)7183 static void state_M3X4(Context *ctx)
7184 {
7185 const DestArgInfo *info = &ctx->dest_arg;
7186 if (!writemask_xyzw(info->writemask))
7187 fail(ctx, "M3X4 writemask must be .xyzw");
7188
7189 // !!! FIXME: MSDN stuff
7190
7191 srcarg_matrix_replicate(ctx, 1, 4);
7192 } // state_M3X4
7193
state_M3X3(Context * ctx)7194 static void state_M3X3(Context *ctx)
7195 {
7196 const DestArgInfo *info = &ctx->dest_arg;
7197 if (!writemask_xyz(info->writemask))
7198 fail(ctx, "M3X3 writemask must be .xyz");
7199
7200 // !!! FIXME: MSDN stuff
7201
7202 srcarg_matrix_replicate(ctx, 1, 3);
7203 } // state_M3X3
7204
state_M3X2(Context * ctx)7205 static void state_M3X2(Context *ctx)
7206 {
7207 const DestArgInfo *info = &ctx->dest_arg;
7208 if (!writemask_xy(info->writemask))
7209 fail(ctx, "M3X2 writemask must be .xy");
7210
7211 // !!! FIXME: MSDN stuff
7212
7213 srcarg_matrix_replicate(ctx, 1, 2);
7214 } // state_M3X2
7215
state_RET(Context * ctx)7216 static void state_RET(Context *ctx)
7217 {
7218 // MSDN all but says that assembly shaders are more or less serialized
7219 // HLSL functions, and a RET means you're at the end of one, unlike how
7220 // most CPUs would behave. This is actually really helpful,
7221 // since we can use high-level constructs and not a mess of GOTOs,
7222 // which is a godsend for GLSL...this also means we can consider things
7223 // like a LOOP without a matching ENDLOOP within a label's section as
7224 // an error.
7225 if (ctx->loops > 0)
7226 fail(ctx, "LOOP without ENDLOOP");
7227 if (ctx->reps > 0)
7228 fail(ctx, "REP without ENDREP");
7229 } // state_RET
7230
check_label_register(Context * ctx,int arg,const char * opcode)7231 static void check_label_register(Context *ctx, int arg, const char *opcode)
7232 {
7233 const SourceArgInfo *info = &ctx->source_args[arg];
7234 const RegisterType regtype = info->regtype;
7235 const int regnum = info->regnum;
7236
7237 if (regtype != REG_TYPE_LABEL)
7238 failf(ctx, "%s with a non-label register specified", opcode);
7239 if (!shader_version_atleast(ctx, 2, 0))
7240 failf(ctx, "%s not supported in Shader Model 1", opcode);
7241 if ((shader_version_atleast(ctx, 2, 255)) && (regnum > 2047))
7242 fail(ctx, "label register number must be <= 2047");
7243 if (regnum > 15)
7244 fail(ctx, "label register number must be <= 15");
7245 } // check_label_register
7246
state_LABEL(Context * ctx)7247 static void state_LABEL(Context *ctx)
7248 {
7249 if (ctx->previous_opcode != OPCODE_RET)
7250 fail(ctx, "LABEL not followed by a RET");
7251 check_label_register(ctx, 0, "LABEL");
7252 set_defined_register(ctx, REG_TYPE_LABEL, ctx->source_args[0].regnum);
7253 } // state_LABEL
7254
check_call_loop_wrappage(Context * ctx,const int regnum)7255 static void check_call_loop_wrappage(Context *ctx, const int regnum)
7256 {
7257 // msdn says subroutines inherit aL register if you're in a loop when
7258 // you call, and further more _if you ever call this function in a loop,
7259 // it must always be called in a loop_. So we'll just pass our loop
7260 // variable as a function parameter in those cases.
7261
7262 const int current_usage = (ctx->loops > 0) ? 1 : -1;
7263 RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, regnum);
7264 assert(reg != NULL);
7265
7266 if (reg->misc == 0)
7267 reg->misc = current_usage;
7268 else if (reg->misc != current_usage)
7269 {
7270 if (current_usage == 1)
7271 fail(ctx, "CALL to this label must be wrapped in LOOP/ENDLOOP");
7272 else
7273 fail(ctx, "CALL to this label must not be wrapped in LOOP/ENDLOOP");
7274 } // else if
7275 } // check_call_loop_wrappage
7276
state_CALL(Context * ctx)7277 static void state_CALL(Context *ctx)
7278 {
7279 check_label_register(ctx, 0, "CALL");
7280 check_call_loop_wrappage(ctx, ctx->source_args[0].regnum);
7281 } // state_CALL
7282
state_CALLNZ(Context * ctx)7283 static void state_CALLNZ(Context *ctx)
7284 {
7285 const RegisterType regtype = ctx->source_args[1].regtype;
7286 if ((regtype != REG_TYPE_CONSTBOOL) && (regtype != REG_TYPE_PREDICATE))
7287 fail(ctx, "CALLNZ argument isn't constbool or predicate register");
7288 check_label_register(ctx, 0, "CALLNZ");
7289 check_call_loop_wrappage(ctx, ctx->source_args[0].regnum);
7290 } // state_CALLNZ
7291
state_MOVA(Context * ctx)7292 static void state_MOVA(Context *ctx)
7293 {
7294 if (ctx->dest_arg.regtype != REG_TYPE_ADDRESS)
7295 fail(ctx, "MOVA argument isn't address register");
7296 } // state_MOVA
7297
state_RCP(Context * ctx)7298 static void state_RCP(Context *ctx)
7299 {
7300 if (!replicate_swizzle(ctx->source_args[0].swizzle))
7301 fail(ctx, "RCP without replicate swizzzle");
7302 } // state_RCP
7303
state_LOOP(Context * ctx)7304 static void state_LOOP(Context *ctx)
7305 {
7306 if (ctx->source_args[0].regtype != REG_TYPE_LOOP)
7307 fail(ctx, "LOOP argument isn't loop register");
7308 else if (ctx->source_args[1].regtype != REG_TYPE_CONSTINT)
7309 fail(ctx, "LOOP argument isn't constint register");
7310 else
7311 ctx->loops++;
7312 } // state_LOOP
7313
state_ENDLOOP(Context * ctx)7314 static void state_ENDLOOP(Context *ctx)
7315 {
7316 // !!! FIXME: check that we aren't straddling an IF block.
7317 if (ctx->loops <= 0)
7318 fail(ctx, "ENDLOOP without LOOP");
7319 ctx->loops--;
7320 } // state_ENDLOOP
7321
state_BREAKP(Context * ctx)7322 static void state_BREAKP(Context *ctx)
7323 {
7324 const RegisterType regtype = ctx->source_args[0].regtype;
7325 if (regtype != REG_TYPE_PREDICATE)
7326 fail(ctx, "BREAKP argument isn't predicate register");
7327 else if (!replicate_swizzle(ctx->source_args[0].swizzle))
7328 fail(ctx, "BREAKP without replicate swizzzle");
7329 else if ((ctx->loops == 0) && (ctx->reps == 0))
7330 fail(ctx, "BREAKP outside LOOP/ENDLOOP or REP/ENDREP");
7331 } // state_BREAKP
7332
state_BREAK(Context * ctx)7333 static void state_BREAK(Context *ctx)
7334 {
7335 if ((ctx->loops == 0) && (ctx->reps == 0))
7336 fail(ctx, "BREAK outside LOOP/ENDLOOP or REP/ENDREP");
7337 } // state_BREAK
7338
state_SETP(Context * ctx)7339 static void state_SETP(Context *ctx)
7340 {
7341 const RegisterType regtype = ctx->dest_arg.regtype;
7342 if (regtype != REG_TYPE_PREDICATE)
7343 fail(ctx, "SETP argument isn't predicate register");
7344 } // state_SETP
7345
state_REP(Context * ctx)7346 static void state_REP(Context *ctx)
7347 {
7348 const RegisterType regtype = ctx->source_args[0].regtype;
7349 if (regtype != REG_TYPE_CONSTINT)
7350 fail(ctx, "REP argument isn't constint register");
7351
7352 ctx->reps++;
7353 if (ctx->reps > ctx->max_reps)
7354 ctx->max_reps = ctx->reps;
7355 } // state_REP
7356
state_ENDREP(Context * ctx)7357 static void state_ENDREP(Context *ctx)
7358 {
7359 // !!! FIXME: check that we aren't straddling an IF block.
7360 if (ctx->reps <= 0)
7361 fail(ctx, "ENDREP without REP");
7362 ctx->reps--;
7363 } // state_ENDREP
7364
state_CMP(Context * ctx)7365 static void state_CMP(Context *ctx)
7366 {
7367 ctx->cmps++;
7368
7369 // extra limitations for ps <= 1.4 ...
7370 if (!shader_version_atleast(ctx, 1, 4))
7371 {
7372 int i;
7373 const DestArgInfo *dst = &ctx->dest_arg;
7374 const RegisterType dregtype = dst->regtype;
7375 const int dregnum = dst->regnum;
7376
7377 if (ctx->cmps > 3)
7378 fail(ctx, "only 3 CMP instructions allowed in this shader model");
7379
7380 for (i = 0; i < 3; i++)
7381 {
7382 const SourceArgInfo *src = &ctx->source_args[i];
7383 const RegisterType sregtype = src->regtype;
7384 const int sregnum = src->regnum;
7385 if ((dregtype == sregtype) && (dregnum == sregnum))
7386 fail(ctx, "CMP dest can't match sources in this shader model");
7387 } // for
7388
7389 ctx->instruction_count++; // takes an extra slot in ps_1_2 and _3.
7390 } // if
7391 } // state_CMP
7392
state_DP4(Context * ctx)7393 static void state_DP4(Context *ctx)
7394 {
7395 // extra limitations for ps <= 1.4 ...
7396 if (!shader_version_atleast(ctx, 1, 4))
7397 ctx->instruction_count++; // takes an extra slot in ps_1_2 and _3.
7398 } // state_DP4
7399
state_CND(Context * ctx)7400 static void state_CND(Context *ctx)
7401 {
7402 // apparently it was removed...it's not in the docs past ps_1_4 ...
7403 if (shader_version_atleast(ctx, 2, 0))
7404 fail(ctx, "CND not allowed in this shader model");
7405
7406 // extra limitations for ps <= 1.4 ...
7407 else if (!shader_version_atleast(ctx, 1, 4))
7408 {
7409 const SourceArgInfo *src = &ctx->source_args[0];
7410 if ((src->regtype != REG_TYPE_TEMP) || (src->regnum != 0) ||
7411 (src->swizzle != 0xFF))
7412 {
7413 fail(ctx, "CND src must be r0.a in this shader model");
7414 } // if
7415 } // if
7416 } // state_CND
7417
state_POW(Context * ctx)7418 static void state_POW(Context *ctx)
7419 {
7420 if (!replicate_swizzle(ctx->source_args[0].swizzle))
7421 fail(ctx, "POW src0 must have replicate swizzle");
7422 else if (!replicate_swizzle(ctx->source_args[1].swizzle))
7423 fail(ctx, "POW src1 must have replicate swizzle");
7424 } // state_POW
7425
state_LOG(Context * ctx)7426 static void state_LOG(Context *ctx)
7427 {
7428 if (!replicate_swizzle(ctx->source_args[0].swizzle))
7429 fail(ctx, "LOG src0 must have replicate swizzle");
7430 } // state_LOG
7431
state_LOGP(Context * ctx)7432 static void state_LOGP(Context *ctx)
7433 {
7434 if (!replicate_swizzle(ctx->source_args[0].swizzle))
7435 fail(ctx, "LOGP src0 must have replicate swizzle");
7436 } // state_LOGP
7437
state_SINCOS(Context * ctx)7438 static void state_SINCOS(Context *ctx)
7439 {
7440 const DestArgInfo *dst = &ctx->dest_arg;
7441 const int mask = dst->writemask;
7442 if (!writemask_x(mask) && !writemask_y(mask) && !writemask_xy(mask))
7443 fail(ctx, "SINCOS write mask must be .x or .y or .xy");
7444
7445 else if (!replicate_swizzle(ctx->source_args[0].swizzle))
7446 fail(ctx, "SINCOS src0 must have replicate swizzle");
7447
7448 else if (dst->result_mod & MOD_SATURATE) // according to msdn...
7449 fail(ctx, "SINCOS destination can't use saturate modifier");
7450
7451 // this opcode needs extra registers, with extra limitations, for <= sm2.
7452 else if (!shader_version_atleast(ctx, 3, 0))
7453 {
7454 int i;
7455 for (i = 1; i < 3; i++)
7456 {
7457 if (ctx->source_args[i].regtype != REG_TYPE_CONST)
7458 {
7459 failf(ctx, "SINCOS src%d must be constfloat", i);
7460 return;
7461 } // if
7462 } // for
7463
7464 if (ctx->source_args[1].regnum == ctx->source_args[2].regnum)
7465 fail(ctx, "SINCOS src1 and src2 must be different registers");
7466 } // if
7467 } // state_SINCOS
7468
state_IF(Context * ctx)7469 static void state_IF(Context *ctx)
7470 {
7471 const RegisterType regtype = ctx->source_args[0].regtype;
7472 if ((regtype != REG_TYPE_PREDICATE) && (regtype != REG_TYPE_CONSTBOOL))
7473 fail(ctx, "IF src0 must be CONSTBOOL or PREDICATE");
7474 else if (!replicate_swizzle(ctx->source_args[0].swizzle))
7475 fail(ctx, "IF src0 must have replicate swizzle");
7476 // !!! FIXME: track if nesting depth.
7477 } // state_IF
7478
state_IFC(Context * ctx)7479 static void state_IFC(Context *ctx)
7480 {
7481 if (!replicate_swizzle(ctx->source_args[0].swizzle))
7482 fail(ctx, "IFC src0 must have replicate swizzle");
7483 else if (!replicate_swizzle(ctx->source_args[1].swizzle))
7484 fail(ctx, "IFC src1 must have replicate swizzle");
7485 // !!! FIXME: track if nesting depth.
7486 } // state_IFC
7487
state_BREAKC(Context * ctx)7488 static void state_BREAKC(Context *ctx)
7489 {
7490 if (!replicate_swizzle(ctx->source_args[0].swizzle))
7491 fail(ctx, "BREAKC src1 must have replicate swizzle");
7492 else if (!replicate_swizzle(ctx->source_args[1].swizzle))
7493 fail(ctx, "BREAKC src2 must have replicate swizzle");
7494 else if ((ctx->loops == 0) && (ctx->reps == 0))
7495 fail(ctx, "BREAKC outside LOOP/ENDLOOP or REP/ENDREP");
7496 } // state_BREAKC
7497
state_TEXKILL(Context * ctx)7498 static void state_TEXKILL(Context *ctx)
7499 {
7500 // The MSDN docs say this should be a source arg, but the driver docs
7501 // say it's a dest arg. That's annoying.
7502 const DestArgInfo *info = &ctx->dest_arg;
7503 const RegisterType regtype = info->regtype;
7504 if (!writemask_xyzw(info->writemask))
7505 fail(ctx, "TEXKILL writemask must be .xyzw");
7506 else if ((regtype != REG_TYPE_TEMP) && (regtype != REG_TYPE_TEXTURE))
7507 fail(ctx, "TEXKILL must use a temp or texture register");
7508
7509 // !!! FIXME: "If a temporary register is used, all components must have been previously written."
7510 // !!! FIXME: "If a texture register is used, all components that are read must have been declared."
7511 // !!! FIXME: there are further limitations in ps_1_3 and earlier.
7512 } // state_TEXKILL
7513
7514 // Some rules that apply to some of the fruity ps_1_1 texture opcodes...
state_texops(Context * ctx,const char * opcode,const int dims,const int texbem)7515 static void state_texops(Context *ctx, const char *opcode,
7516 const int dims, const int texbem)
7517 {
7518 const DestArgInfo *dst = &ctx->dest_arg;
7519 const SourceArgInfo *src = &ctx->source_args[0];
7520 if (dst->regtype != REG_TYPE_TEXTURE)
7521 failf(ctx, "%s destination must be a texture register", opcode);
7522 if (src->regtype != REG_TYPE_TEXTURE)
7523 failf(ctx, "%s source must be a texture register", opcode);
7524 if (src->regnum >= dst->regnum) // so says MSDN.
7525 failf(ctx, "%s dest must be a higher register than source", opcode);
7526
7527 if (dims)
7528 {
7529 TextureType ttyp = (dims == 2) ? TEXTURE_TYPE_2D : TEXTURE_TYPE_CUBE;
7530 add_sampler(ctx, dst->regnum, ttyp, texbem);
7531 } // if
7532
7533 add_attribute_register(ctx, REG_TYPE_TEXTURE, dst->regnum,
7534 MOJOSHADER_USAGE_TEXCOORD, dst->regnum, 0xF, 0);
7535
7536 // Strictly speaking, there should be a TEX opcode prior to this call that
7537 // should fill in this metadata, but I'm not sure that's required for the
7538 // shader to assemble in D3D, so we'll do this so we don't fail with a
7539 // cryptic error message even if the developer didn't do the TEX.
7540 add_attribute_register(ctx, REG_TYPE_TEXTURE, src->regnum,
7541 MOJOSHADER_USAGE_TEXCOORD, src->regnum, 0xF, 0);
7542 } // state_texops
7543
state_texbem(Context * ctx,const char * opcode)7544 static void state_texbem(Context *ctx, const char *opcode)
7545 {
7546 // The TEXBEM equasion, according to MSDN:
7547 //u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R
7548 // + D3DTSS_BUMPENVMAT10(stage m)*t(n)G
7549 //v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R
7550 // + D3DTSS_BUMPENVMAT11(stage m)*t(n)G
7551 //t(m)RGBA = TextureSample(stage m)
7552 //
7553 // ...TEXBEML adds this at the end:
7554 //t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) +
7555 // D3DTSS_BUMPENVLOFFSET(stage m)]
7556
7557 if (shader_version_atleast(ctx, 1, 4))
7558 failf(ctx, "%s opcode not available after Shader Model 1.3", opcode);
7559
7560 if (!shader_version_atleast(ctx, 1, 2))
7561 {
7562 if (ctx->source_args[0].src_mod == SRCMOD_SIGN)
7563 failf(ctx, "%s forbids _bx2 on source reg before ps_1_2", opcode);
7564 } // if
7565
7566 // !!! FIXME: MSDN:
7567 // !!! FIXME: Register data that has been read by a texbem
7568 // !!! FIXME: or texbeml instruction cannot be read later,
7569 // !!! FIXME: except by another texbem or texbeml.
7570
7571 state_texops(ctx, opcode, 2, 1);
7572 } // state_texbem
7573
state_TEXBEM(Context * ctx)7574 static void state_TEXBEM(Context *ctx)
7575 {
7576 state_texbem(ctx, "TEXBEM");
7577 } // state_TEXBEM
7578
state_TEXBEML(Context * ctx)7579 static void state_TEXBEML(Context *ctx)
7580 {
7581 state_texbem(ctx, "TEXBEML");
7582 } // state_TEXBEML
7583
state_TEXM3X2PAD(Context * ctx)7584 static void state_TEXM3X2PAD(Context *ctx)
7585 {
7586 if (shader_version_atleast(ctx, 1, 4))
7587 fail(ctx, "TEXM3X2PAD opcode not available after Shader Model 1.3");
7588 state_texops(ctx, "TEXM3X2PAD", 0, 0);
7589 // !!! FIXME: check for correct opcode existance and order more rigorously?
7590 ctx->texm3x2pad_src0 = ctx->source_args[0].regnum;
7591 ctx->texm3x2pad_dst0 = ctx->dest_arg.regnum;
7592 } // state_TEXM3X2PAD
7593
state_TEXM3X2TEX(Context * ctx)7594 static void state_TEXM3X2TEX(Context *ctx)
7595 {
7596 if (shader_version_atleast(ctx, 1, 4))
7597 fail(ctx, "TEXM3X2TEX opcode not available after Shader Model 1.3");
7598 if (ctx->texm3x2pad_dst0 == -1)
7599 fail(ctx, "TEXM3X2TEX opcode without matching TEXM3X2PAD");
7600 // !!! FIXME: check for correct opcode existance and order more rigorously?
7601 state_texops(ctx, "TEXM3X2TEX", 2, 0);
7602 ctx->reset_texmpad = 1;
7603
7604 RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
7605 ctx->dest_arg.regnum);
7606 const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
7607
7608 // A samplermap might change this to something nonsensical.
7609 if (ttype != TEXTURE_TYPE_2D)
7610 fail(ctx, "TEXM3X2TEX needs a 2D sampler");
7611 } // state_TEXM3X2TEX
7612
state_TEXM3X3PAD(Context * ctx)7613 static void state_TEXM3X3PAD(Context *ctx)
7614 {
7615 if (shader_version_atleast(ctx, 1, 4))
7616 fail(ctx, "TEXM3X2TEX opcode not available after Shader Model 1.3");
7617 state_texops(ctx, "TEXM3X3PAD", 0, 0);
7618
7619 // !!! FIXME: check for correct opcode existance and order more rigorously?
7620 if (ctx->texm3x3pad_dst0 == -1)
7621 {
7622 ctx->texm3x3pad_src0 = ctx->source_args[0].regnum;
7623 ctx->texm3x3pad_dst0 = ctx->dest_arg.regnum;
7624 } // if
7625 else if (ctx->texm3x3pad_dst1 == -1)
7626 {
7627 ctx->texm3x3pad_src1 = ctx->source_args[0].regnum;
7628 ctx->texm3x3pad_dst1 = ctx->dest_arg.regnum;
7629 } // else
7630 } // state_TEXM3X3PAD
7631
state_texm3x3(Context * ctx,const char * opcode,const int dims)7632 static void state_texm3x3(Context *ctx, const char *opcode, const int dims)
7633 {
7634 // !!! FIXME: check for correct opcode existance and order more rigorously?
7635 if (shader_version_atleast(ctx, 1, 4))
7636 failf(ctx, "%s opcode not available after Shader Model 1.3", opcode);
7637 if (ctx->texm3x3pad_dst1 == -1)
7638 failf(ctx, "%s opcode without matching TEXM3X3PADs", opcode);
7639 state_texops(ctx, opcode, dims, 0);
7640 ctx->reset_texmpad = 1;
7641
7642 RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
7643 ctx->dest_arg.regnum);
7644 const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
7645
7646 // A samplermap might change this to something nonsensical.
7647 if ((ttype != TEXTURE_TYPE_VOLUME) && (ttype != TEXTURE_TYPE_CUBE))
7648 failf(ctx, "%s needs a 3D or Cubemap sampler", opcode);
7649 } // state_texm3x3
7650
state_TEXM3X3(Context * ctx)7651 static void state_TEXM3X3(Context *ctx)
7652 {
7653 if (!shader_version_atleast(ctx, 1, 2))
7654 fail(ctx, "TEXM3X3 opcode not available in Shader Model 1.1");
7655 state_texm3x3(ctx, "TEXM3X3", 0);
7656 } // state_TEXM3X3
7657
state_TEXM3X3TEX(Context * ctx)7658 static void state_TEXM3X3TEX(Context *ctx)
7659 {
7660 state_texm3x3(ctx, "TEXM3X3TEX", 3);
7661 } // state_TEXM3X3TEX
7662
state_TEXM3X3SPEC(Context * ctx)7663 static void state_TEXM3X3SPEC(Context *ctx)
7664 {
7665 state_texm3x3(ctx, "TEXM3X3SPEC", 3);
7666 if (ctx->source_args[1].regtype != REG_TYPE_CONST)
7667 fail(ctx, "TEXM3X3SPEC final arg must be a constant register");
7668 } // state_TEXM3X3SPEC
7669
state_TEXM3X3VSPEC(Context * ctx)7670 static void state_TEXM3X3VSPEC(Context *ctx)
7671 {
7672 state_texm3x3(ctx, "TEXM3X3VSPEC", 3);
7673 } // state_TEXM3X3VSPEC
7674
7675
state_TEXLD(Context * ctx)7676 static void state_TEXLD(Context *ctx)
7677 {
7678 if (shader_version_atleast(ctx, 2, 0))
7679 {
7680 const SourceArgInfo *src0 = &ctx->source_args[0];
7681 const SourceArgInfo *src1 = &ctx->source_args[1];
7682
7683 // !!! FIXME: verify texldp restrictions:
7684 //http://msdn.microsoft.com/en-us/library/bb206221(VS.85).aspx
7685 // !!! FIXME: ...and texldb, too.
7686 //http://msdn.microsoft.com/en-us/library/bb206217(VS.85).aspx
7687
7688 //const RegisterType rt0 = src0->regtype;
7689
7690 // !!! FIXME: msdn says it has to be temp, but Microsoft's HLSL
7691 // !!! FIXME: compiler is generating code that uses oC0 for a dest.
7692 //if (ctx->dest_arg.regtype != REG_TYPE_TEMP)
7693 // fail(ctx, "TEXLD dest must be a temp register");
7694
7695 // !!! FIXME: this can be an REG_TYPE_INPUT, DCL'd to TEXCOORD.
7696 //else if ((rt0 != REG_TYPE_TEXTURE) && (rt0 != REG_TYPE_TEMP))
7697 // fail(ctx, "TEXLD src0 must be texture or temp register");
7698 //else
7699
7700 if (src0->src_mod != SRCMOD_NONE)
7701 fail(ctx, "TEXLD src0 must have no modifiers");
7702 else if (src1->regtype != REG_TYPE_SAMPLER)
7703 fail(ctx, "TEXLD src1 must be sampler register");
7704 else if (src1->src_mod != SRCMOD_NONE)
7705 fail(ctx, "TEXLD src1 must have no modifiers");
7706 else if ( (ctx->instruction_controls != CONTROL_TEXLD) &&
7707 (ctx->instruction_controls != CONTROL_TEXLDP) &&
7708 (ctx->instruction_controls != CONTROL_TEXLDB) )
7709 {
7710 fail(ctx, "TEXLD has unknown control bits");
7711 } // else if
7712
7713 // Shader Model 3 added swizzle support to this opcode.
7714 if (!shader_version_atleast(ctx, 3, 0))
7715 {
7716 if (!no_swizzle(src0->swizzle))
7717 fail(ctx, "TEXLD src0 must not swizzle");
7718 else if (!no_swizzle(src1->swizzle))
7719 fail(ctx, "TEXLD src1 must not swizzle");
7720 } // if
7721
7722 if ( ((TextureType) ctx->source_args[1].regnum) == TEXTURE_TYPE_CUBE )
7723 ctx->instruction_count += 3;
7724 } // if
7725
7726 else if (shader_version_atleast(ctx, 1, 4))
7727 {
7728 // !!! FIXME: checks for ps_1_4 version here...
7729 } // else if
7730
7731 else
7732 {
7733 // !!! FIXME: add (other?) checks for ps_1_1 version here...
7734 const DestArgInfo *info = &ctx->dest_arg;
7735 const int sampler = info->regnum;
7736 if (info->regtype != REG_TYPE_TEXTURE)
7737 fail(ctx, "TEX param must be a texture register");
7738 add_sampler(ctx, sampler, TEXTURE_TYPE_2D, 0);
7739 add_attribute_register(ctx, REG_TYPE_TEXTURE, sampler,
7740 MOJOSHADER_USAGE_TEXCOORD, sampler, 0xF, 0);
7741 } // else
7742 } // state_TEXLD
7743
state_TEXLDL(Context * ctx)7744 static void state_TEXLDL(Context *ctx)
7745 {
7746 if (!shader_version_atleast(ctx, 3, 0))
7747 fail(ctx, "TEXLDL in version < Shader Model 3.0");
7748 else if (ctx->source_args[1].regtype != REG_TYPE_SAMPLER)
7749 fail(ctx, "TEXLDL src1 must be sampler register");
7750 else
7751 {
7752 if ( ((TextureType) ctx->source_args[1].regnum) == TEXTURE_TYPE_CUBE )
7753 ctx->instruction_count += 3;
7754 } // else
7755 } // state_TEXLDL
7756
state_DP2ADD(Context * ctx)7757 static void state_DP2ADD(Context *ctx)
7758 {
7759 if (!replicate_swizzle(ctx->source_args[2].swizzle))
7760 fail(ctx, "DP2ADD src2 must have replicate swizzle");
7761 } // state_DP2ADD
7762
7763
7764 // Lookup table for instruction opcodes...
7765 typedef struct
7766 {
7767 const char *opcode_string;
7768 int slots; // number of instruction slots this opcode eats.
7769 MOJOSHADER_shaderType shader_types; // mask of types that can use opcode.
7770 args_function parse_args;
7771 state_function state;
7772 emit_function emitter[STATICARRAYLEN(profiles)];
7773 } Instruction;
7774
7775 // These have to be in the right order! This array is indexed by the value
7776 // of the instruction token.
7777 static const Instruction instructions[] =
7778 {
7779 #define INSTRUCTION_STATE(op, opstr, slots, a, t) { \
7780 opstr, slots, t, parse_args_##a, state_##op, PROFILE_EMITTERS(op) \
7781 },
7782
7783 #define INSTRUCTION(op, opstr, slots, a, t) { \
7784 opstr, slots, t, parse_args_##a, 0, PROFILE_EMITTERS(op) \
7785 },
7786
7787 #define MOJOSHADER_DO_INSTRUCTION_TABLE 1
7788 #include "mojoshader_internal.h"
7789 #undef MOJOSHADER_DO_INSTRUCTION_TABLE
7790
7791 #undef INSTRUCTION
7792 #undef INSTRUCTION_STATE
7793 };
7794
7795
7796 // parse various token types...
7797
parse_instruction_token(Context * ctx)7798 static int parse_instruction_token(Context *ctx)
7799 {
7800 int retval = 0;
7801 const int start_position = ctx->current_position;
7802 const uint32 *start_tokens = ctx->tokens;
7803 const uint32 start_tokencount = ctx->tokencount;
7804 const uint32 token = SWAP32(*(ctx->tokens));
7805 const uint32 opcode = (token & 0xFFFF);
7806 const uint32 controls = ((token >> 16) & 0xFF);
7807 const uint32 insttoks = ((token >> 24) & 0x0F);
7808 const int coissue = (token & 0x40000000) ? 1 : 0;
7809 const int predicated = (token & 0x10000000) ? 1 : 0;
7810
7811 if ( opcode >= (sizeof (instructions) / sizeof (instructions[0])) )
7812 return 0; // not an instruction token, or just not handled here.
7813
7814 const Instruction *instruction = &instructions[opcode];
7815 const emit_function emitter = instruction->emitter[ctx->profileid];
7816
7817 if ((token & 0x80000000) != 0)
7818 fail(ctx, "instruction token high bit must be zero."); // so says msdn.
7819
7820 if (instruction->opcode_string == NULL)
7821 {
7822 fail(ctx, "Unknown opcode.");
7823 return insttoks + 1; // pray that you resync later.
7824 } // if
7825
7826 ctx->coissue = coissue;
7827 if (coissue)
7828 {
7829 if (!shader_is_pixel(ctx))
7830 fail(ctx, "coissue instruction on non-pixel shader");
7831 if (shader_version_atleast(ctx, 2, 0))
7832 fail(ctx, "coissue instruction in Shader Model >= 2.0");
7833 } // if
7834
7835 if ((ctx->shader_type & instruction->shader_types) == 0)
7836 {
7837 failf(ctx, "opcode '%s' not available in this shader type.",
7838 instruction->opcode_string);
7839 } // if
7840
7841 memset(ctx->dwords, '\0', sizeof (ctx->dwords));
7842 ctx->instruction_controls = controls;
7843 ctx->predicated = predicated;
7844
7845 // Update the context with instruction's arguments.
7846 adjust_token_position(ctx, 1);
7847 retval = instruction->parse_args(ctx);
7848
7849 if (predicated)
7850 retval += parse_predicated_token(ctx);
7851
7852 // parse_args() moves these forward for convenience...reset them.
7853 ctx->tokens = start_tokens;
7854 ctx->tokencount = start_tokencount;
7855 ctx->current_position = start_position;
7856
7857 if (instruction->state != NULL)
7858 instruction->state(ctx);
7859
7860 ctx->instruction_count += instruction->slots;
7861
7862 if (!isfail(ctx))
7863 emitter(ctx); // call the profile's emitter.
7864
7865 if (ctx->reset_texmpad)
7866 {
7867 ctx->texm3x2pad_dst0 = -1;
7868 ctx->texm3x2pad_src0 = -1;
7869 ctx->texm3x3pad_dst0 = -1;
7870 ctx->texm3x3pad_src0 = -1;
7871 ctx->texm3x3pad_dst1 = -1;
7872 ctx->texm3x3pad_src1 = -1;
7873 ctx->reset_texmpad = 0;
7874 } // if
7875
7876 ctx->previous_opcode = opcode;
7877 ctx->scratch_registers = 0; // reset after every instruction.
7878
7879 if (!shader_version_atleast(ctx, 2, 0))
7880 {
7881 if (insttoks != 0) // reserved field in shaders < 2.0 ...
7882 fail(ctx, "instruction token count must be zero");
7883 } // if
7884 else
7885 {
7886 if (((uint32)retval) != (insttoks+1))
7887 {
7888 failf(ctx, "wrong token count (%u, not %u) for opcode '%s'.",
7889 (uint) retval, (uint) (insttoks+1),
7890 instruction->opcode_string);
7891 retval = insttoks + 1; // try to keep sync.
7892 } // if
7893 } // else
7894
7895 return retval;
7896 } // parse_instruction_token
7897
7898
parse_version_token(Context * ctx,const char * profilestr)7899 static int parse_version_token(Context *ctx, const char *profilestr)
7900 {
7901 if (ctx->tokencount == 0)
7902 {
7903 fail(ctx, "Expected version token, got none at all.");
7904 return 0;
7905 } // if
7906
7907 const uint32 token = SWAP32(*(ctx->tokens));
7908 const uint32 shadertype = ((token >> 16) & 0xFFFF);
7909 const uint8 major = (uint8) ((token >> 8) & 0xFF);
7910 const uint8 minor = (uint8) (token & 0xFF);
7911
7912 ctx->version_token = token;
7913
7914 // 0xFFFF == pixel shader, 0xFFFE == vertex shader
7915 if (shadertype == 0xFFFF)
7916 {
7917 ctx->shader_type = MOJOSHADER_TYPE_PIXEL;
7918 ctx->shader_type_str = "ps";
7919 } // if
7920 else if (shadertype == 0xFFFE)
7921 {
7922 ctx->shader_type = MOJOSHADER_TYPE_VERTEX;
7923 ctx->shader_type_str = "vs";
7924 } // else if
7925 else // geometry shader? Bogus data?
7926 {
7927 fail(ctx, "Unsupported shader type or not a shader at all");
7928 return -1;
7929 } // else
7930
7931 ctx->major_ver = major;
7932 ctx->minor_ver = minor;
7933
7934 if (!shader_version_supported(major, minor))
7935 {
7936 failf(ctx, "Shader Model %u.%u is currently unsupported.",
7937 (uint) major, (uint) minor);
7938 } // if
7939
7940 if (!isfail(ctx))
7941 ctx->profile->start_emitter(ctx, profilestr);
7942
7943 return 1; // ate one token.
7944 } // parse_version_token
7945
7946
parse_ctab_string(const uint8 * start,const uint32 bytes,const uint32 name)7947 static int parse_ctab_string(const uint8 *start, const uint32 bytes,
7948 const uint32 name)
7949 {
7950 // Make sure strings don't overflow the CTAB buffer...
7951 if (name < bytes)
7952 {
7953 int i;
7954 const int slenmax = bytes - name;
7955 const char *namestr = (const char *) (start + name);
7956 for (i = 0; i < slenmax; i++)
7957 {
7958 if (namestr[i] == '\0')
7959 return 1; // it's okay.
7960 } // for
7961 } // if
7962
7963 return 0; // overflowed.
7964 } // parse_ctab_string
7965
7966
parse_ctab_typeinfo(Context * ctx,const uint8 * start,const uint32 bytes,const uint32 pos,MOJOSHADER_symbolTypeInfo * info)7967 static int parse_ctab_typeinfo(Context *ctx, const uint8 *start,
7968 const uint32 bytes, const uint32 pos,
7969 MOJOSHADER_symbolTypeInfo *info)
7970 {
7971 if ((pos + 16) >= bytes)
7972 return 0; // corrupt CTAB.
7973
7974 const uint16 *typeptr = (const uint16 *) (start + pos);
7975
7976 info->parameter_class = (MOJOSHADER_symbolClass) SWAP16(typeptr[0]);
7977 info->parameter_type = (MOJOSHADER_symbolType) SWAP16(typeptr[1]);
7978 info->rows = (unsigned int) SWAP16(typeptr[2]);
7979 info->columns = (unsigned int) SWAP16(typeptr[3]);
7980 info->elements = (unsigned int) SWAP16(typeptr[4]);
7981 info->member_count = (unsigned int) SWAP16(typeptr[5]);
7982
7983 if ((pos + 16 + (info->member_count * 8)) >= bytes)
7984 return 0; // corrupt CTAB.
7985
7986 if (info->member_count == 0)
7987 info->members = NULL;
7988 else
7989 {
7990 const size_t len = sizeof (MOJOSHADER_symbolStructMember) *
7991 info->member_count;
7992 info->members = (MOJOSHADER_symbolStructMember *) Malloc(ctx, len);
7993 if (info->members == NULL)
7994 return 1; // we'll check ctx->out_of_memory later.
7995 memset(info->members, '\0', len);
7996 } // else
7997
7998 int i;
7999 const uint32 *member = (const uint32 *)((const uint8 *) (&typeptr[6]));
8000 for (i = 0; i < info->member_count; i++)
8001 {
8002 MOJOSHADER_symbolStructMember *mbr = &info->members[i];
8003 const uint32 name = SWAP32(member[0]);
8004 const uint32 memberinfopos = SWAP32(member[1]);
8005 member += 2;
8006
8007 if (!parse_ctab_string(start, bytes, name))
8008 return 0; // info->members will be free()'d elsewhere.
8009
8010 mbr->name = StrDup(ctx, (const char *) (start + name));
8011 if (mbr->name == NULL)
8012 return 1; // we'll check ctx->out_of_memory later.
8013 if (!parse_ctab_typeinfo(ctx, start, bytes, memberinfopos, &mbr->info))
8014 return 0;
8015 if (ctx->out_of_memory)
8016 return 1; // drop out now.
8017 } // for
8018
8019 return 1;
8020 } // parse_ctab_typeinfo
8021
8022
8023 // Microsoft's tools add a CTAB comment to all shaders. This is the
8024 // "constant table," or specifically: D3DXSHADER_CONSTANTTABLE:
8025 // http://msdn.microsoft.com/en-us/library/bb205440(VS.85).aspx
8026 // This may tell us high-level truths about an otherwise generic low-level
8027 // registers, for instance, how large an array actually is, etc.
parse_constant_table(Context * ctx,const uint32 * tokens,const uint32 bytes,const uint32 okay_version,const int setvariables,CtabData * ctab)8028 static void parse_constant_table(Context *ctx, const uint32 *tokens,
8029 const uint32 bytes, const uint32 okay_version,
8030 const int setvariables, CtabData *ctab)
8031 {
8032 const uint32 id = SWAP32(tokens[1]);
8033 if (id != CTAB_ID)
8034 return; // not the constant table.
8035
8036 assert(ctab->have_ctab == 0); // !!! FIXME: can you have more than one?
8037 ctab->have_ctab = 1;
8038
8039 const uint8 *start = (uint8 *) &tokens[2];
8040
8041 if (bytes < 32)
8042 {
8043 fail(ctx, "Truncated CTAB data");
8044 return;
8045 } // if
8046
8047 const uint32 size = SWAP32(tokens[2]);
8048 const uint32 creator = SWAP32(tokens[3]);
8049 const uint32 version = SWAP32(tokens[4]);
8050 const uint32 constants = SWAP32(tokens[5]);
8051 const uint32 constantinfo = SWAP32(tokens[6]);
8052 const uint32 target = SWAP32(tokens[8]);
8053
8054 if (size != CTAB_SIZE)
8055 goto corrupt_ctab;
8056
8057 if (version != okay_version) goto corrupt_ctab;
8058 if (creator >= bytes) goto corrupt_ctab;
8059 if ((constantinfo + (constants * CINFO_SIZE)) >= bytes) goto corrupt_ctab;
8060 if (target >= bytes) goto corrupt_ctab;
8061 if (!parse_ctab_string(start, bytes, target)) goto corrupt_ctab;
8062 // !!! FIXME: check that (start+target) points to "ps_3_0", etc.
8063
8064 ctab->symbol_count = constants;
8065 ctab->symbols = (MOJOSHADER_symbol *)Malloc(ctx, sizeof (MOJOSHADER_symbol) * constants);
8066 if (ctab->symbols == NULL)
8067 return;
8068 memset(ctab->symbols, '\0', sizeof (MOJOSHADER_symbol) * constants);
8069
8070 uint32 i = 0;
8071 for (i = 0; i < constants; i++)
8072 {
8073 const uint8 *ptr = start + constantinfo + (i * CINFO_SIZE);
8074 const uint32 name = SWAP32(*((uint32 *) (ptr + 0)));
8075 const uint16 regset = SWAP16(*((uint16 *) (ptr + 4)));
8076 const uint16 regidx = SWAP16(*((uint16 *) (ptr + 6)));
8077 const uint16 regcnt = SWAP16(*((uint16 *) (ptr + 8)));
8078 const uint32 typeinf = SWAP32(*((uint32 *) (ptr + 12)));
8079 const uint32 defval = SWAP32(*((uint32 *) (ptr + 16)));
8080 MOJOSHADER_uniformType mojotype = MOJOSHADER_UNIFORM_UNKNOWN;
8081
8082 if (!parse_ctab_string(start, bytes, name)) goto corrupt_ctab;
8083 if (defval >= bytes) goto corrupt_ctab;
8084
8085 switch (regset)
8086 {
8087 case 0: mojotype = MOJOSHADER_UNIFORM_BOOL; break;
8088 case 1: mojotype = MOJOSHADER_UNIFORM_INT; break;
8089 case 2: mojotype = MOJOSHADER_UNIFORM_FLOAT; break;
8090 case 3: /* SAMPLER */ break;
8091 default: goto corrupt_ctab;
8092 } // switch
8093
8094 if ((setvariables) && (mojotype != MOJOSHADER_UNIFORM_UNKNOWN))
8095 {
8096 VariableList *item;
8097 item = (VariableList *) Malloc(ctx, sizeof (VariableList));
8098 if (item != NULL)
8099 {
8100 item->type = mojotype;
8101 item->index = regidx;
8102 item->count = regcnt;
8103 item->constant = NULL;
8104 item->used = 0;
8105 item->emit_position = -1;
8106 item->next = ctx->variables;
8107 ctx->variables = item;
8108 } // if
8109 } // if
8110
8111 // Add the symbol.
8112 const char *namecpy = StrDup(ctx, (const char *) (start + name));
8113 if (namecpy == NULL)
8114 return;
8115
8116 MOJOSHADER_symbol *sym = &ctab->symbols[i];
8117 sym->name = namecpy;
8118 sym->register_set = (MOJOSHADER_symbolRegisterSet) regset;
8119 sym->register_index = (unsigned int) regidx;
8120 sym->register_count = (unsigned int) regcnt;
8121 if (!parse_ctab_typeinfo(ctx, start, bytes, typeinf, &sym->info))
8122 goto corrupt_ctab; // sym->name will get free()'d later.
8123 else if (ctx->out_of_memory)
8124 return; // just bail now.
8125 } // for
8126
8127 return;
8128
8129 corrupt_ctab:
8130 fail(ctx, "Shader has corrupt CTAB data");
8131 } // parse_constant_table
8132
8133
8134 static void free_symbols(MOJOSHADER_free f, void *d, MOJOSHADER_symbol *syms,
8135 const int symcount);
8136
8137
is_comment_token(Context * ctx,const uint32 tok,uint32 * tokcount)8138 static int is_comment_token(Context *ctx, const uint32 tok, uint32 *tokcount)
8139 {
8140 const uint32 token = SWAP32(tok);
8141 if ((token & 0xFFFF) == 0xFFFE) // actually a comment token?
8142 {
8143 if ((token & 0x80000000) != 0)
8144 fail(ctx, "comment token high bit must be zero."); // so says msdn.
8145 *tokcount = ((token >> 16) & 0xFFFF);
8146 return 1;
8147 } // if
8148
8149 return 0;
8150 } // is_comment_token
8151
8152
8153 typedef struct PreshaderBlockInfo
8154 {
8155 const uint32 *tokens;
8156 uint32 tokcount;
8157 int seen;
8158 } PreshaderBlockInfo;
8159
8160 // Preshaders only show up in compiled Effect files. The format is
8161 // undocumented, and even the instructions aren't the same opcodes as you
8162 // would find in a regular shader. These things show up because the HLSL
8163 // compiler can detect work that sets up constant registers that could
8164 // be moved out of the shader itself. Preshaders run once, then the shader
8165 // itself runs many times, using the constant registers the preshader has set
8166 // up. There are cases where the preshaders are 3+ times as many instructions
8167 // as the shader itself, so this can be a big performance win.
8168 // My presumption is that Microsoft's Effects framework runs the preshaders on
8169 // the CPU, then loads the constant register file appropriately before handing
8170 // off to the GPU. As such, we do the same.
parse_preshader(Context * ctx,uint32 tokcount)8171 static void parse_preshader(Context *ctx, uint32 tokcount)
8172 {
8173 const uint32 *tokens = ctx->tokens;
8174 if ((tokcount < 2) || (SWAP32(tokens[1]) != PRES_ID))
8175 return; // not a preshader.
8176
8177 #if !SUPPORT_PRESHADERS
8178 fail(ctx, "Preshader found, but preshader support is disabled!");
8179 #else
8180
8181 assert(ctx->have_preshader == 0); // !!! FIXME: can you have more than one?
8182 ctx->have_preshader = 1;
8183
8184 // !!! FIXME: I don't know what specific versions signify, but we need to
8185 // !!! FIXME: save this to test against the CTAB version field, if
8186 // !!! FIXME: nothing else.
8187 // !!! FIXME: 0x02 0x01 is probably the version (fx_2_1),
8188 // !!! FIXME: and 0x4658 is the magic, like a real shader's version token.
8189 const uint32 okay_version = 0x46580201;
8190 if (SWAP32(tokens[2]) != okay_version)
8191 {
8192 fail(ctx, "Unsupported preshader version.");
8193 return; // fail because the shader will malfunction w/o this.
8194 } // if
8195
8196 tokens += 3;
8197 tokcount -= 3;
8198
8199 // All sections of a preshader are packed into separate comment tokens,
8200 // inside the containing comment token block. Find them all before
8201 // we start, so we don't care about the order they appear in the file.
8202 PreshaderBlockInfo ctab = { 0, 0, 0 };
8203 PreshaderBlockInfo prsi = { 0, 0, 0 };
8204 PreshaderBlockInfo fxlc = { 0, 0, 0 };
8205 PreshaderBlockInfo clit = { 0, 0, 0 };
8206
8207 while (tokcount > 0)
8208 {
8209 uint32 subtokcount = 0;
8210 if ( (!is_comment_token(ctx, *tokens, &subtokcount)) ||
8211 (subtokcount > tokcount) )
8212 {
8213 fail(ctx, "Bogus preshader data.");
8214 return;
8215 } // if
8216
8217 tokens++;
8218 tokcount--;
8219
8220 const uint32 *nexttokens = tokens + subtokcount;
8221 const uint32 nexttokcount = tokcount - subtokcount;
8222
8223 if (subtokcount > 0)
8224 {
8225 switch (SWAP32(*tokens))
8226 {
8227 #define PRESHADER_BLOCK_CASE(id, var) \
8228 case id##_ID: { \
8229 if (var.seen) { \
8230 fail(ctx, "Multiple " #id " preshader blocks."); \
8231 return; \
8232 } \
8233 var.tokens = tokens; \
8234 var.tokcount = subtokcount; \
8235 var.seen = 1; \
8236 break; \
8237 }
8238 PRESHADER_BLOCK_CASE(CTAB, ctab);
8239 PRESHADER_BLOCK_CASE(PRSI, prsi);
8240 PRESHADER_BLOCK_CASE(FXLC, fxlc);
8241 PRESHADER_BLOCK_CASE(CLIT, clit);
8242 default: fail(ctx, "Bogus preshader section."); return;
8243 #undef PRESHADER_BLOCK_CASE
8244 } // switch
8245 } // if
8246
8247 tokens = nexttokens;
8248 tokcount = nexttokcount;
8249 } // while
8250
8251 if (!ctab.seen) { fail(ctx, "No CTAB block in preshader."); return; }
8252 if (!prsi.seen) { fail(ctx, "No PRSI block in preshader."); return; }
8253 if (!fxlc.seen) { fail(ctx, "No FXLC block in preshader."); return; }
8254 if (!clit.seen) { fail(ctx, "No CLIT block in preshader."); return; }
8255
8256 MOJOSHADER_preshader *preshader = (MOJOSHADER_preshader *)
8257 Malloc(ctx, sizeof (MOJOSHADER_preshader));
8258 if (preshader == NULL)
8259 return;
8260 memset(preshader, '\0', sizeof (MOJOSHADER_preshader));
8261 ctx->preshader = preshader;
8262
8263 // Let's set up the constant literals first...
8264 if (clit.tokcount == 0)
8265 fail(ctx, "Bogus CLIT block in preshader.");
8266 else
8267 {
8268 const uint32 lit_count = SWAP32(clit.tokens[1]);
8269 if (lit_count > ((clit.tokcount - 2) / 2))
8270 {
8271 fail(ctx, "Bogus CLIT block in preshader.");
8272 return;
8273 } // if
8274 else if (lit_count > 0)
8275 {
8276 preshader->literal_count = (unsigned int) lit_count;
8277 assert(sizeof (double) == 8); // just in case.
8278 const size_t len = sizeof (double) * lit_count;
8279 preshader->literals = (double *) Malloc(ctx, len);
8280 if (preshader->literals == NULL)
8281 return; // oh well.
8282 const double *litptr = (const double *) (clit.tokens + 2);
8283 int i;
8284 for (i = 0; i < lit_count; i++)
8285 preshader->literals[i] = SWAPDBL(litptr[i]);
8286 } // else if
8287 } // else
8288
8289 // Parse out the PRSI block. This is used to map the output registers.
8290 if (prsi.tokcount < 8)
8291 {
8292 fail(ctx, "Bogus preshader PRSI data");
8293 return;
8294 } // if
8295
8296 //const uint32 first_output_reg = SWAP32(prsi.tokens[1]);
8297 // !!! FIXME: there are a lot of fields here I don't know about.
8298 // !!! FIXME: maybe [2] and [3] are for int4 and bool registers?
8299 //const uint32 output_reg_count = SWAP32(prsi.tokens[4]);
8300 // !!! FIXME: maybe [5] and [6] are for int4 and bool registers?
8301 const uint32 output_map_count = SWAP32(prsi.tokens[7]);
8302
8303 prsi.tokcount -= 8;
8304 prsi.tokens += 8;
8305
8306 if (prsi.tokcount < ((output_map_count + 1) * 2))
8307 {
8308 fail(ctx, "Bogus preshader PRSI data");
8309 return;
8310 } // if
8311
8312 const uint32 *output_map = prsi.tokens;
8313
8314 // Now we'll figure out the CTAB...
8315 CtabData ctabdata = { 0, 0, 0 };
8316 parse_constant_table(ctx, ctab.tokens - 1, ctab.tokcount * 4,
8317 okay_version, 0, &ctabdata);
8318
8319 // preshader owns this now. Don't free it in this function.
8320 preshader->symbol_count = ctabdata.symbol_count;
8321 preshader->symbols = ctabdata.symbols;
8322
8323 if (!ctabdata.have_ctab)
8324 {
8325 fail(ctx, "Bogus preshader CTAB data");
8326 return;
8327 } // if
8328
8329 // The FXLC block has the actual instructions...
8330 uint32 opcode_count = SWAP32(fxlc.tokens[1]);
8331
8332 size_t len = sizeof (MOJOSHADER_preshaderInstruction) * opcode_count;
8333 preshader->instruction_count = (unsigned int) opcode_count;
8334 preshader->instructions = (MOJOSHADER_preshaderInstruction *)
8335 Malloc(ctx, len);
8336 if (preshader->instructions == NULL)
8337 return;
8338 memset(preshader->instructions, '\0', len);
8339
8340 fxlc.tokens += 2;
8341 fxlc.tokcount -= 2;
8342 if (opcode_count > (fxlc.tokcount / 2))
8343 {
8344 fail(ctx, "Bogus preshader FXLC block.");
8345 return;
8346 } // if
8347
8348 MOJOSHADER_preshaderInstruction *inst = preshader->instructions;
8349 while (opcode_count--)
8350 {
8351 const uint32 opcodetok = SWAP32(fxlc.tokens[0]);
8352 MOJOSHADER_preshaderOpcode opcode = MOJOSHADER_PRESHADEROP_NOP;
8353 switch ((opcodetok >> 16) & 0xFFFF)
8354 {
8355 case 0x1000: opcode = MOJOSHADER_PRESHADEROP_MOV; break;
8356 case 0x1010: opcode = MOJOSHADER_PRESHADEROP_NEG; break;
8357 case 0x1030: opcode = MOJOSHADER_PRESHADEROP_RCP; break;
8358 case 0x1040: opcode = MOJOSHADER_PRESHADEROP_FRC; break;
8359 case 0x1050: opcode = MOJOSHADER_PRESHADEROP_EXP; break;
8360 case 0x1060: opcode = MOJOSHADER_PRESHADEROP_LOG; break;
8361 case 0x1070: opcode = MOJOSHADER_PRESHADEROP_RSQ; break;
8362 case 0x1080: opcode = MOJOSHADER_PRESHADEROP_SIN; break;
8363 case 0x1090: opcode = MOJOSHADER_PRESHADEROP_COS; break;
8364 case 0x10A0: opcode = MOJOSHADER_PRESHADEROP_ASIN; break;
8365 case 0x10B0: opcode = MOJOSHADER_PRESHADEROP_ACOS; break;
8366 case 0x10C0: opcode = MOJOSHADER_PRESHADEROP_ATAN; break;
8367 case 0x2000: opcode = MOJOSHADER_PRESHADEROP_MIN; break;
8368 case 0x2010: opcode = MOJOSHADER_PRESHADEROP_MAX; break;
8369 case 0x2020: opcode = MOJOSHADER_PRESHADEROP_LT; break;
8370 case 0x2030: opcode = MOJOSHADER_PRESHADEROP_GE; break;
8371 case 0x2040: opcode = MOJOSHADER_PRESHADEROP_ADD; break;
8372 case 0x2050: opcode = MOJOSHADER_PRESHADEROP_MUL; break;
8373 case 0x2060: opcode = MOJOSHADER_PRESHADEROP_ATAN2; break;
8374 case 0x2080: opcode = MOJOSHADER_PRESHADEROP_DIV; break;
8375 case 0x3000: opcode = MOJOSHADER_PRESHADEROP_CMP; break;
8376 case 0x3010: opcode = MOJOSHADER_PRESHADEROP_MOVC; break;
8377 case 0x5000: opcode = MOJOSHADER_PRESHADEROP_DOT; break;
8378 case 0x5020: opcode = MOJOSHADER_PRESHADEROP_NOISE; break;
8379 case 0xA000: opcode = MOJOSHADER_PRESHADEROP_MIN_SCALAR; break;
8380 case 0xA010: opcode = MOJOSHADER_PRESHADEROP_MAX_SCALAR; break;
8381 case 0xA020: opcode = MOJOSHADER_PRESHADEROP_LT_SCALAR; break;
8382 case 0xA030: opcode = MOJOSHADER_PRESHADEROP_GE_SCALAR; break;
8383 case 0xA040: opcode = MOJOSHADER_PRESHADEROP_ADD_SCALAR; break;
8384 case 0xA050: opcode = MOJOSHADER_PRESHADEROP_MUL_SCALAR; break;
8385 case 0xA060: opcode = MOJOSHADER_PRESHADEROP_ATAN2_SCALAR; break;
8386 case 0xA080: opcode = MOJOSHADER_PRESHADEROP_DIV_SCALAR; break;
8387 case 0xD000: opcode = MOJOSHADER_PRESHADEROP_DOT_SCALAR; break;
8388 case 0xD020: opcode = MOJOSHADER_PRESHADEROP_NOISE_SCALAR; break;
8389 default: fail(ctx, "Unknown preshader opcode."); break;
8390 } // switch
8391
8392 uint32 operand_count = SWAP32(fxlc.tokens[1]) + 1; // +1 for dest.
8393
8394 inst->opcode = opcode;
8395 inst->element_count = (unsigned int) (opcodetok & 0xFF);
8396 inst->operand_count = (unsigned int) operand_count;
8397
8398 fxlc.tokens += 2;
8399 fxlc.tokcount -= 2;
8400 if ((operand_count * 3) > fxlc.tokcount)
8401 {
8402 fail(ctx, "Bogus preshader FXLC block.");
8403 return;
8404 } // if
8405
8406 MOJOSHADER_preshaderOperand *operand = inst->operands;
8407 while (operand_count--)
8408 {
8409 const unsigned int item = (unsigned int) SWAP32(fxlc.tokens[2]);
8410
8411 // !!! FIXME: don't know what first token does.
8412 switch (SWAP32(fxlc.tokens[1]))
8413 {
8414 case 1: // literal from CLIT block.
8415 {
8416 if (item >= preshader->literal_count)
8417 {
8418 fail(ctx, "Bogus preshader literal index.");
8419 break;
8420 } // if
8421 operand->type = MOJOSHADER_PRESHADEROPERAND_LITERAL;
8422 break;
8423 } // case
8424
8425 case 2: // item from ctabdata.
8426 {
8427 int i;
8428 MOJOSHADER_symbol *sym = ctabdata.symbols;
8429 for (i = 0; i < ctabdata.symbol_count; i++, sym++)
8430 {
8431 const uint32 base = sym->register_index * 4;
8432 const uint32 count = sym->register_count * 4;
8433 assert(sym->register_set==MOJOSHADER_SYMREGSET_FLOAT4);
8434 if ( (base <= item) && ((base + count) > item) )
8435 break;
8436 } // for
8437 if (i == ctabdata.symbol_count)
8438 {
8439 fail(ctx, "Bogus preshader input index.");
8440 break;
8441 } // if
8442 operand->type = MOJOSHADER_PRESHADEROPERAND_INPUT;
8443 break;
8444 } // case
8445
8446 case 4:
8447 {
8448 int i;
8449 for (i = 0; i < output_map_count; i++)
8450 {
8451 const uint32 base = output_map[(i*2)] * 4;
8452 const uint32 count = output_map[(i*2)+1] * 4;
8453 if ( (base <= item) && ((base + count) > item) )
8454 break;
8455 } // for
8456 if (i == output_map_count)
8457 {
8458 fail(ctx, "Bogus preshader output index.");
8459 break;
8460 } // if
8461
8462 operand->type = MOJOSHADER_PRESHADEROPERAND_OUTPUT;
8463 break;
8464 } // case
8465
8466 case 7:
8467 {
8468 operand->type = MOJOSHADER_PRESHADEROPERAND_TEMP;
8469 if (item >= preshader->temp_count)
8470 preshader->temp_count = item + 1;
8471 break;
8472 } // case
8473 } // switch
8474
8475 operand->index = item;
8476
8477 fxlc.tokens += 3;
8478 fxlc.tokcount -= 3;
8479 operand++;
8480 } // while
8481
8482 inst++;
8483 } // while
8484 #endif
8485 } // parse_preshader
8486
8487
parse_comment_token(Context * ctx)8488 static int parse_comment_token(Context *ctx)
8489 {
8490 uint32 commenttoks = 0;
8491 if (is_comment_token(ctx, *ctx->tokens, &commenttoks))
8492 {
8493 if ((commenttoks >= 1) && (commenttoks < ctx->tokencount))
8494 {
8495 const uint32 id = SWAP32(ctx->tokens[1]);
8496 if (id == PRES_ID)
8497 parse_preshader(ctx, commenttoks);
8498 else if (id == CTAB_ID)
8499 {
8500 parse_constant_table(ctx, ctx->tokens, commenttoks * 4,
8501 ctx->version_token, 1, &ctx->ctab);
8502 } // else if
8503 } // if
8504 return commenttoks + 1; // comment data plus the initial token.
8505 } // if
8506
8507 return 0; // not a comment token.
8508 } // parse_comment_token
8509
8510
parse_end_token(Context * ctx)8511 static int parse_end_token(Context *ctx)
8512 {
8513 if (SWAP32(*(ctx->tokens)) != 0x0000FFFF) // end token always 0x0000FFFF.
8514 return 0; // not us, eat no tokens.
8515
8516 if (ctx->tokencount != 1) // we _must_ be last. If not: fail.
8517 fail(ctx, "end token before end of stream");
8518
8519 if (!isfail(ctx))
8520 ctx->profile->end_emitter(ctx);
8521
8522 return 1;
8523 } // parse_end_token
8524
8525
parse_phase_token(Context * ctx)8526 static int parse_phase_token(Context *ctx)
8527 {
8528 // !!! FIXME: needs state; allow only one phase token per shader, I think?
8529 if (SWAP32(*(ctx->tokens)) != 0x0000FFFD) // phase token always 0x0000FFFD.
8530 return 0; // not us, eat no tokens.
8531
8532 if ( (!shader_is_pixel(ctx)) || (!shader_version_exactly(ctx, 1, 4)) )
8533 fail(ctx, "phase token only available in 1.4 pixel shaders");
8534
8535 if (!isfail(ctx))
8536 ctx->profile->phase_emitter(ctx);
8537
8538 return 1;
8539 } // parse_phase_token
8540
8541
parse_token(Context * ctx)8542 static int parse_token(Context *ctx)
8543 {
8544 int rc = 0;
8545
8546 assert(ctx->output_stack_len == 0);
8547
8548 if (ctx->tokencount == 0)
8549 fail(ctx, "unexpected end of shader.");
8550
8551 else if ((rc = parse_comment_token(ctx)) != 0)
8552 return rc;
8553
8554 else if ((rc = parse_end_token(ctx)) != 0)
8555 return rc;
8556
8557 else if ((rc = parse_phase_token(ctx)) != 0)
8558 return rc;
8559
8560 else if ((rc = parse_instruction_token(ctx)) != 0)
8561 return rc;
8562
8563 failf(ctx, "unknown token (0x%x)", (uint) *ctx->tokens);
8564 return 1; // good luck!
8565 } // parse_token
8566
8567
find_profile_id(const char * profile)8568 static int find_profile_id(const char *profile)
8569 {
8570 size_t i;
8571 for (i = 0; i < STATICARRAYLEN(profileMap); i++)
8572 {
8573 const char *name = profileMap[i].from;
8574 if (strcmp(name, profile) == 0)
8575 {
8576 profile = profileMap[i].to;
8577 break;
8578 } // if
8579 } // for
8580
8581 for (i = 0; i < STATICARRAYLEN(profiles); i++)
8582 {
8583 const char *name = profiles[i].name;
8584 if (strcmp(name, profile) == 0)
8585 return i;
8586 } // for
8587
8588 return -1; // no match.
8589 } // find_profile_id
8590
8591
build_context(const char * profile,const unsigned char * tokenbuf,const unsigned int bufsize,const MOJOSHADER_swizzle * swiz,const unsigned int swizcount,const MOJOSHADER_samplerMap * smap,const unsigned int smapcount,MOJOSHADER_malloc m,MOJOSHADER_free f,void * d)8592 static Context *build_context(const char *profile,
8593 const unsigned char *tokenbuf,
8594 const unsigned int bufsize,
8595 const MOJOSHADER_swizzle *swiz,
8596 const unsigned int swizcount,
8597 const MOJOSHADER_samplerMap *smap,
8598 const unsigned int smapcount,
8599 MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
8600 {
8601 if (m == NULL) m = MOJOSHADER_internal_malloc;
8602 if (f == NULL) f = MOJOSHADER_internal_free;
8603
8604 Context *ctx = (Context *) m(sizeof (Context), d);
8605 if (ctx == NULL)
8606 return NULL;
8607
8608 memset(ctx, '\0', sizeof (Context));
8609 ctx->malloc = m;
8610 ctx->free = f;
8611 ctx->malloc_data = d;
8612 ctx->tokens = (const uint32 *) tokenbuf;
8613 ctx->orig_tokens = (const uint32 *) tokenbuf;
8614 ctx->tokencount = bufsize / sizeof (uint32);
8615 ctx->swizzles = swiz;
8616 ctx->swizzles_count = swizcount;
8617 ctx->samplermap = smap;
8618 ctx->samplermap_count = smapcount;
8619 ctx->endline = ENDLINE_STR;
8620 ctx->endline_len = strlen(ctx->endline);
8621 ctx->last_address_reg_component = -1;
8622 ctx->current_position = MOJOSHADER_POSITION_BEFORE;
8623 ctx->texm3x2pad_dst0 = -1;
8624 ctx->texm3x2pad_src0 = -1;
8625 ctx->texm3x3pad_dst0 = -1;
8626 ctx->texm3x3pad_src0 = -1;
8627 ctx->texm3x3pad_dst1 = -1;
8628 ctx->texm3x3pad_src1 = -1;
8629
8630 ctx->errors = errorlist_create(MallocBridge, FreeBridge, ctx);
8631 if (ctx->errors == NULL)
8632 {
8633 f(ctx, d);
8634 return NULL;
8635 } // if
8636
8637 if (!set_output(ctx, &ctx->mainline))
8638 {
8639 errorlist_destroy(ctx->errors);
8640 f(ctx, d);
8641 return NULL;
8642 } // if
8643
8644 const int profileid = find_profile_id(profile);
8645 ctx->profileid = profileid;
8646 if (profileid >= 0)
8647 ctx->profile = &profiles[profileid];
8648 else
8649 failf(ctx, "Profile '%s' is unknown or unsupported", profile);
8650
8651 return ctx;
8652 } // build_context
8653
8654
free_constants_list(MOJOSHADER_free f,void * d,ConstantsList * item)8655 static void free_constants_list(MOJOSHADER_free f, void *d, ConstantsList *item)
8656 {
8657 while (item != NULL)
8658 {
8659 ConstantsList *next = item->next;
8660 f(item, d);
8661 item = next;
8662 } // while
8663 } // free_constants_list
8664
8665
free_variable_list(MOJOSHADER_free f,void * d,VariableList * item)8666 static void free_variable_list(MOJOSHADER_free f, void *d, VariableList *item)
8667 {
8668 while (item != NULL)
8669 {
8670 VariableList *next = item->next;
8671 f(item, d);
8672 item = next;
8673 } // while
8674 } // free_variable_list
8675
8676
free_sym_typeinfo(MOJOSHADER_free f,void * d,MOJOSHADER_symbolTypeInfo * typeinfo)8677 static void free_sym_typeinfo(MOJOSHADER_free f, void *d,
8678 MOJOSHADER_symbolTypeInfo *typeinfo)
8679 {
8680 int i;
8681 for (i = 0; i < typeinfo->member_count; i++)
8682 {
8683 f((void *) typeinfo->members[i].name, d);
8684 free_sym_typeinfo(f, d, &typeinfo->members[i].info);
8685 } // for
8686 f((void *) typeinfo->members, d);
8687 } // free_sym_members
8688
8689
free_symbols(MOJOSHADER_free f,void * d,MOJOSHADER_symbol * syms,const int symcount)8690 static void free_symbols(MOJOSHADER_free f, void *d, MOJOSHADER_symbol *syms,
8691 const int symcount)
8692 {
8693 int i;
8694 for (i = 0; i < symcount; i++)
8695 {
8696 f((void *) syms[i].name, d);
8697 free_sym_typeinfo(f, d, &syms[i].info);
8698 } // for
8699 f((void *) syms, d);
8700 } // free_symbols
8701
8702
free_preshader(MOJOSHADER_free f,void * d,MOJOSHADER_preshader * preshader)8703 static void free_preshader(MOJOSHADER_free f, void *d,
8704 MOJOSHADER_preshader *preshader)
8705 {
8706 if (preshader != NULL)
8707 {
8708 f((void *) preshader->literals, d);
8709 f((void *) preshader->instructions, d);
8710 free_symbols(f, d, preshader->symbols, preshader->symbol_count);
8711 f((void *) preshader, d);
8712 } // if
8713 } // free_preshader
8714
8715
destroy_context(Context * ctx)8716 static void destroy_context(Context *ctx)
8717 {
8718 if (ctx != NULL)
8719 {
8720 MOJOSHADER_free f = ((ctx->free != NULL) ? ctx->free : MOJOSHADER_internal_free);
8721 void *d = ctx->malloc_data;
8722 buffer_destroy(ctx->preflight);
8723 buffer_destroy(ctx->globals);
8724 buffer_destroy(ctx->helpers);
8725 buffer_destroy(ctx->subroutines);
8726 buffer_destroy(ctx->mainline_intro);
8727 buffer_destroy(ctx->mainline);
8728 buffer_destroy(ctx->ignore);
8729 free_constants_list(f, d, ctx->constants);
8730 free_reglist(f, d, ctx->used_registers.next);
8731 free_reglist(f, d, ctx->defined_registers.next);
8732 free_reglist(f, d, ctx->uniforms.next);
8733 free_reglist(f, d, ctx->attributes.next);
8734 free_reglist(f, d, ctx->samplers.next);
8735 free_variable_list(f, d, ctx->variables);
8736 errorlist_destroy(ctx->errors);
8737 free_symbols(f, d, ctx->ctab.symbols, ctx->ctab.symbol_count);
8738 free_preshader(f, d, ctx->preshader);
8739 f(ctx, d);
8740 } // if
8741 } // destroy_context
8742
8743
build_output(Context * ctx,size_t * len)8744 static char *build_output(Context *ctx, size_t *len)
8745 {
8746 // add a byte for a null terminator.
8747 Buffer *buffers[] = {
8748 ctx->preflight, ctx->globals, ctx->helpers,
8749 ctx->subroutines, ctx->mainline_intro, ctx->mainline
8750 // don't append ctx->ignore ... that's why it's called "ignore"
8751 };
8752 char *retval = buffer_merge(buffers, STATICARRAYLEN(buffers), len);
8753 return retval;
8754 } // build_output
8755
8756
alloc_varname(Context * ctx,const RegisterList * reg)8757 static inline const char *alloc_varname(Context *ctx, const RegisterList *reg)
8758 {
8759 return ctx->profile->get_varname(ctx, reg->regtype, reg->regnum);
8760 } // alloc_varname
8761
8762
8763 // !!! FIXME: this code is sort of hard to follow:
8764 // !!! FIXME: "var->used" only applies to arrays (at the moment, at least,
8765 // !!! FIXME: but this might be buggy at a later time?), and this code
8766 // !!! FIXME: relies on that.
8767 // !!! FIXME: "variables" means "things we found in a CTAB" but it's not
8768 // !!! FIXME: all registers, etc.
8769 // !!! FIXME: "const_array" means an array for d3d "const" registers (c0, c1,
8770 // !!! FIXME: etc), but not a constant array, although they _can_ be.
8771 // !!! FIXME: It's just a mess. :/
build_uniforms(Context * ctx)8772 static MOJOSHADER_uniform *build_uniforms(Context *ctx)
8773 {
8774 const size_t len = sizeof (MOJOSHADER_uniform) * ctx->uniform_count;
8775 MOJOSHADER_uniform *retval = (MOJOSHADER_uniform *) Malloc(ctx, len);
8776
8777 if (retval != NULL)
8778 {
8779 MOJOSHADER_uniform *wptr = retval;
8780 memset(wptr, '\0', len);
8781
8782 VariableList *var;
8783 int written = 0;
8784 for (var = ctx->variables; var != NULL; var = var->next)
8785 {
8786 if (var->used)
8787 {
8788 const char *name = ctx->profile->get_const_array_varname(ctx,
8789 var->index, var->count);
8790 if (name != NULL)
8791 {
8792 wptr->type = MOJOSHADER_UNIFORM_FLOAT;
8793 wptr->index = var->index;
8794 wptr->array_count = var->count;
8795 wptr->constant = (var->constant != NULL) ? 1 : 0;
8796 wptr->name = name;
8797 wptr++;
8798 written++;
8799 } // if
8800 } // if
8801 } // for
8802
8803 RegisterList *item = ctx->uniforms.next;
8804 MOJOSHADER_uniformType type = MOJOSHADER_UNIFORM_FLOAT;
8805 while (written < ctx->uniform_count)
8806 {
8807 int skip = 0;
8808
8809 // !!! FIXME: does this fail if written > ctx->uniform_count?
8810 if (item == NULL)
8811 {
8812 fail(ctx, "BUG: mismatched uniform list and count");
8813 break;
8814 } // if
8815
8816 int index = item->regnum;
8817 switch (item->regtype)
8818 {
8819 case REG_TYPE_CONST:
8820 skip = (item->array != NULL);
8821 type = MOJOSHADER_UNIFORM_FLOAT;
8822 break;
8823
8824 case REG_TYPE_CONSTINT:
8825 type = MOJOSHADER_UNIFORM_INT;
8826 break;
8827
8828 case REG_TYPE_CONSTBOOL:
8829 type = MOJOSHADER_UNIFORM_BOOL;
8830 break;
8831
8832 default:
8833 fail(ctx, "unknown uniform datatype");
8834 break;
8835 } // switch
8836
8837 if (!skip)
8838 {
8839 wptr->type = type;
8840 wptr->index = index;
8841 wptr->array_count = 0;
8842 wptr->name = alloc_varname(ctx, item);
8843 wptr++;
8844 written++;
8845 } // if
8846
8847 item = item->next;
8848 } // for
8849 } // if
8850
8851 return retval;
8852 } // build_uniforms
8853
8854
build_constants(Context * ctx)8855 static MOJOSHADER_constant *build_constants(Context *ctx)
8856 {
8857 const size_t len = sizeof (MOJOSHADER_constant) * ctx->constant_count;
8858 MOJOSHADER_constant *retval = (MOJOSHADER_constant *) Malloc(ctx, len);
8859
8860 if (retval != NULL)
8861 {
8862 ConstantsList *item = ctx->constants;
8863 int i;
8864
8865 for (i = 0; i < ctx->constant_count; i++)
8866 {
8867 if (item == NULL)
8868 {
8869 fail(ctx, "BUG: mismatched constant list and count");
8870 break;
8871 } // if
8872
8873 memcpy(&retval[i], &item->constant, sizeof (MOJOSHADER_constant));
8874 item = item->next;
8875 } // for
8876 } // if
8877
8878 return retval;
8879 } // build_constants
8880
8881
build_samplers(Context * ctx)8882 static MOJOSHADER_sampler *build_samplers(Context *ctx)
8883 {
8884 const size_t len = sizeof (MOJOSHADER_sampler) * ctx->sampler_count;
8885 MOJOSHADER_sampler *retval = (MOJOSHADER_sampler *) Malloc(ctx, len);
8886
8887 if (retval != NULL)
8888 {
8889 RegisterList *item = ctx->samplers.next;
8890 int i;
8891
8892 memset(retval, '\0', len);
8893
8894 for (i = 0; i < ctx->sampler_count; i++)
8895 {
8896 if (item == NULL)
8897 {
8898 fail(ctx, "BUG: mismatched sampler list and count");
8899 break;
8900 } // if
8901
8902 assert(item->regtype == REG_TYPE_SAMPLER);
8903 retval[i].type = cvtD3DToMojoSamplerType((TextureType) item->index);
8904 retval[i].index = item->regnum;
8905 retval[i].name = alloc_varname(ctx, item);
8906 retval[i].texbem = (item->misc != 0) ? 1 : 0;
8907 item = item->next;
8908 } // for
8909 } // if
8910
8911 return retval;
8912 } // build_samplers
8913
8914
build_attributes(Context * ctx,int * _count)8915 static MOJOSHADER_attribute *build_attributes(Context *ctx, int *_count)
8916 {
8917 int count = 0;
8918
8919 if (ctx->attribute_count == 0)
8920 {
8921 *_count = 0;
8922 return NULL; // nothing to do.
8923 } // if
8924
8925 const size_t len = sizeof (MOJOSHADER_attribute) * ctx->attribute_count;
8926 MOJOSHADER_attribute *retval = (MOJOSHADER_attribute *) Malloc(ctx, len);
8927
8928 if (retval != NULL)
8929 {
8930 RegisterList *item = ctx->attributes.next;
8931 MOJOSHADER_attribute *wptr = retval;
8932 int ignore = 0;
8933 int i;
8934
8935 memset(retval, '\0', len);
8936
8937 for (i = 0; i < ctx->attribute_count; i++)
8938 {
8939 if (item == NULL)
8940 {
8941 fail(ctx, "BUG: mismatched attribute list and count");
8942 break;
8943 } // if
8944
8945 switch (item->regtype)
8946 {
8947 case REG_TYPE_RASTOUT:
8948 case REG_TYPE_ATTROUT:
8949 case REG_TYPE_TEXCRDOUT:
8950 case REG_TYPE_COLOROUT:
8951 case REG_TYPE_DEPTHOUT:
8952 ignore = 1;
8953 break;
8954 case REG_TYPE_TEXTURE:
8955 case REG_TYPE_MISCTYPE:
8956 case REG_TYPE_INPUT:
8957 ignore = shader_is_pixel(ctx);
8958 break;
8959 default:
8960 ignore = 0;
8961 break;
8962 } // switch
8963
8964 if (!ignore)
8965 {
8966 if (shader_is_pixel(ctx))
8967 fail(ctx, "BUG: pixel shader with vertex attributes");
8968 else
8969 {
8970 wptr->usage = item->usage;
8971 wptr->index = item->index;
8972 wptr->name = alloc_varname(ctx, item);
8973 wptr++;
8974 count++;
8975 } // else
8976 } // if
8977
8978 item = item->next;
8979 } // for
8980 } // if
8981
8982 *_count = count;
8983 return retval;
8984 } // build_attributes
8985
build_outputs(Context * ctx,int * _count)8986 static MOJOSHADER_attribute *build_outputs(Context *ctx, int *_count)
8987 {
8988 int count = 0;
8989
8990 if (ctx->attribute_count == 0)
8991 {
8992 *_count = 0;
8993 return NULL; // nothing to do.
8994 } // if
8995
8996 const size_t len = sizeof (MOJOSHADER_attribute) * ctx->attribute_count;
8997 MOJOSHADER_attribute *retval = (MOJOSHADER_attribute *) Malloc(ctx, len);
8998
8999 if (retval != NULL)
9000 {
9001 RegisterList *item = ctx->attributes.next;
9002 MOJOSHADER_attribute *wptr = retval;
9003 int i;
9004
9005 memset(retval, '\0', len);
9006
9007 for (i = 0; i < ctx->attribute_count; i++)
9008 {
9009 if (item == NULL)
9010 {
9011 fail(ctx, "BUG: mismatched attribute list and count");
9012 break;
9013 } // if
9014
9015 switch (item->regtype)
9016 {
9017 case REG_TYPE_RASTOUT:
9018 case REG_TYPE_ATTROUT:
9019 case REG_TYPE_TEXCRDOUT:
9020 case REG_TYPE_COLOROUT:
9021 case REG_TYPE_DEPTHOUT:
9022 wptr->usage = item->usage;
9023 wptr->index = item->index;
9024 wptr->name = alloc_varname(ctx, item);
9025 wptr++;
9026 count++;
9027 break;
9028 default:
9029 break;
9030 } // switch
9031
9032
9033 item = item->next;
9034 } // for
9035 } // if
9036
9037 *_count = count;
9038 return retval;
9039 } // build_outputs
9040
9041
build_parsedata(Context * ctx)9042 static MOJOSHADER_parseData *build_parsedata(Context *ctx)
9043 {
9044 char *output = NULL;
9045 MOJOSHADER_constant *constants = NULL;
9046 MOJOSHADER_uniform *uniforms = NULL;
9047 MOJOSHADER_attribute *attributes = NULL;
9048 MOJOSHADER_attribute *outputs = NULL;
9049 MOJOSHADER_sampler *samplers = NULL;
9050 MOJOSHADER_swizzle *swizzles = NULL;
9051 MOJOSHADER_error *errors = NULL;
9052 MOJOSHADER_parseData *retval = NULL;
9053 size_t output_len = 0;
9054 int attribute_count = 0;
9055 int output_count = 0;
9056
9057 if (ctx->out_of_memory)
9058 return &MOJOSHADER_out_of_mem_data;
9059
9060 retval = (MOJOSHADER_parseData*) Malloc(ctx, sizeof(MOJOSHADER_parseData));
9061 if (retval == NULL)
9062 return &MOJOSHADER_out_of_mem_data;
9063
9064 memset(retval, '\0', sizeof (MOJOSHADER_parseData));
9065
9066 if (!isfail(ctx))
9067 output = build_output(ctx, &output_len);
9068
9069 if (!isfail(ctx))
9070 constants = build_constants(ctx);
9071
9072 if (!isfail(ctx))
9073 uniforms = build_uniforms(ctx);
9074
9075 if (!isfail(ctx))
9076 attributes = build_attributes(ctx, &attribute_count);
9077
9078 if (!isfail(ctx))
9079 outputs = build_outputs(ctx, &output_count);
9080
9081 if (!isfail(ctx))
9082 samplers = build_samplers(ctx);
9083
9084 const int error_count = errorlist_count(ctx->errors);
9085 errors = errorlist_flatten(ctx->errors);
9086
9087 if (!isfail(ctx))
9088 {
9089 if (ctx->swizzles_count > 0)
9090 {
9091 const int len = ctx->swizzles_count * sizeof (MOJOSHADER_swizzle);
9092 swizzles = (MOJOSHADER_swizzle *) Malloc(ctx, len);
9093 if (swizzles != NULL)
9094 memcpy(swizzles, ctx->swizzles, len);
9095 } // if
9096 } // if
9097
9098 // check again, in case build_output, etc, ran out of memory.
9099 if (isfail(ctx))
9100 {
9101 int i;
9102
9103 Free(ctx, output);
9104 Free(ctx, constants);
9105 Free(ctx, swizzles);
9106
9107 if (uniforms != NULL)
9108 {
9109 for (i = 0; i < ctx->uniform_count; i++)
9110 Free(ctx, (void *) uniforms[i].name);
9111 Free(ctx, uniforms);
9112 } // if
9113
9114 if (attributes != NULL)
9115 {
9116 for (i = 0; i < attribute_count; i++)
9117 Free(ctx, (void *) attributes[i].name);
9118 Free(ctx, attributes);
9119 } // if
9120
9121 if (outputs != NULL)
9122 {
9123 for (i = 0; i < output_count; i++)
9124 Free(ctx, (void *) outputs[i].name);
9125 Free(ctx, outputs);
9126 } // if
9127
9128 if (samplers != NULL)
9129 {
9130 for (i = 0; i < ctx->sampler_count; i++)
9131 Free(ctx, (void *) samplers[i].name);
9132 Free(ctx, samplers);
9133 } // if
9134
9135 if (ctx->out_of_memory)
9136 {
9137 for (i = 0; i < error_count; i++)
9138 {
9139 Free(ctx, (void *) errors[i].filename);
9140 Free(ctx, (void *) errors[i].error);
9141 } // for
9142 Free(ctx, errors);
9143 Free(ctx, retval);
9144 return &MOJOSHADER_out_of_mem_data;
9145 } // if
9146 } // if
9147 else
9148 {
9149 retval->profile = ctx->profile->name;
9150 retval->output = output;
9151 retval->output_len = (int) output_len;
9152 retval->instruction_count = ctx->instruction_count;
9153 retval->shader_type = ctx->shader_type;
9154 retval->major_ver = (int) ctx->major_ver;
9155 retval->minor_ver = (int) ctx->minor_ver;
9156 retval->uniform_count = ctx->uniform_count;
9157 retval->uniforms = uniforms;
9158 retval->constant_count = ctx->constant_count;
9159 retval->constants = constants;
9160 retval->sampler_count = ctx->sampler_count;
9161 retval->samplers = samplers;
9162 retval->attribute_count = attribute_count;
9163 retval->attributes = attributes;
9164 retval->output_count = output_count;
9165 retval->outputs = outputs;
9166 retval->swizzle_count = ctx->swizzles_count;
9167 retval->swizzles = swizzles;
9168 retval->symbol_count = ctx->ctab.symbol_count;
9169 retval->symbols = ctx->ctab.symbols;
9170 retval->preshader = ctx->preshader;
9171
9172 // we don't own these now, retval does.
9173 ctx->ctab.symbols = NULL;
9174 ctx->preshader = NULL;
9175 ctx->ctab.symbol_count = 0;
9176 } // else
9177
9178 retval->error_count = error_count;
9179 retval->errors = errors;
9180 retval->malloc = (ctx->malloc == MOJOSHADER_internal_malloc) ? NULL : ctx->malloc;
9181 retval->free = (ctx->free == MOJOSHADER_internal_free) ? NULL : ctx->free;
9182 retval->malloc_data = ctx->malloc_data;
9183
9184 return retval;
9185 } // build_parsedata
9186
9187
process_definitions(Context * ctx)9188 static void process_definitions(Context *ctx)
9189 {
9190 // !!! FIXME: apparently, pre ps_3_0, sampler registers don't need to be
9191 // !!! FIXME: DCL'd before use (default to 2d?). We aren't checking
9192 // !!! FIXME: this at the moment, though.
9193
9194 determine_constants_arrays(ctx); // in case this hasn't been called yet.
9195
9196 RegisterList *uitem = &ctx->uniforms;
9197 RegisterList *prev = &ctx->used_registers;
9198 RegisterList *item = prev->next;
9199
9200 while (item != NULL)
9201 {
9202 RegisterList *next = item->next;
9203 const RegisterType regtype = item->regtype;
9204 const int regnum = item->regnum;
9205
9206 if (!get_defined_register(ctx, regtype, regnum))
9207 {
9208 // haven't already dealt with this one.
9209 switch (regtype)
9210 {
9211 // !!! FIXME: I'm not entirely sure this is right...
9212 case REG_TYPE_RASTOUT:
9213 case REG_TYPE_ATTROUT:
9214 case REG_TYPE_TEXCRDOUT:
9215 case REG_TYPE_COLOROUT:
9216 case REG_TYPE_DEPTHOUT:
9217 if (shader_is_vertex(ctx)&&shader_version_atleast(ctx,3,0))
9218 {
9219 fail(ctx, "vs_3 can't use output registers"
9220 " without declaring them first.");
9221 return;
9222 } // if
9223
9224 // Apparently this is an attribute that wasn't DCL'd.
9225 // Add it to the attribute list; deal with it later.
9226 add_attribute_register(ctx, regtype, regnum,
9227 MOJOSHADER_USAGE_UNKNOWN, 0, 0xF, 0);
9228 break;
9229
9230 case REG_TYPE_ADDRESS:
9231 case REG_TYPE_PREDICATE:
9232 case REG_TYPE_TEMP:
9233 case REG_TYPE_LOOP:
9234 case REG_TYPE_LABEL:
9235 ctx->profile->global_emitter(ctx, regtype, regnum);
9236 break;
9237
9238 case REG_TYPE_CONST:
9239 case REG_TYPE_CONSTINT:
9240 case REG_TYPE_CONSTBOOL:
9241 // separate uniforms into a different list for now.
9242 prev->next = next;
9243 item->next = NULL;
9244 uitem->next = item;
9245 uitem = item;
9246 item = prev;
9247 break;
9248
9249 case REG_TYPE_INPUT:
9250 // You don't have to dcl_ your inputs in Shader Model 1.
9251 if (shader_is_pixel(ctx)&&!shader_version_atleast(ctx,2,0))
9252 {
9253 add_attribute_register(ctx, regtype, regnum,
9254 MOJOSHADER_USAGE_COLOR, regnum,
9255 0xF, 0);
9256 break;
9257 } // if
9258 // fall through...
9259
9260 default:
9261 fail(ctx, "BUG: we used a register we don't know how to define.");
9262 } // switch
9263 } // if
9264
9265 prev = item;
9266 item = next;
9267 } // while
9268
9269 // okay, now deal with uniform/constant arrays...
9270 VariableList *var;
9271 for (var = ctx->variables; var != NULL; var = var->next)
9272 {
9273 if (var->used)
9274 {
9275 if (var->constant)
9276 {
9277 ctx->profile->const_array_emitter(ctx, var->constant,
9278 var->index, var->count);
9279 } // if
9280 else
9281 {
9282 ctx->profile->array_emitter(ctx, var);
9283 ctx->uniform_float4_count += var->count;
9284 ctx->uniform_count++;
9285 } // else
9286 } // if
9287 } // for
9288
9289 // ...and uniforms...
9290 for (item = ctx->uniforms.next; item != NULL; item = item->next)
9291 {
9292 int arraysize = -1;
9293
9294 // check if this is a register contained in an array...
9295 if (item->regtype == REG_TYPE_CONST)
9296 {
9297 for (var = ctx->variables; var != NULL; var = var->next)
9298 {
9299 if (!var->used)
9300 continue;
9301
9302 const int regnum = item->regnum;
9303 const int lo = var->index;
9304 if ( (regnum >= lo) && (regnum < (lo + var->count)) )
9305 {
9306 assert(!var->constant);
9307 item->array = var; // used when building parseData.
9308 arraysize = var->count;
9309 break;
9310 } // if
9311 } // for
9312 } // if
9313
9314 ctx->profile->uniform_emitter(ctx, item->regtype, item->regnum, var);
9315
9316 if (arraysize < 0) // not part of an array?
9317 {
9318 ctx->uniform_count++;
9319 switch (item->regtype)
9320 {
9321 case REG_TYPE_CONST: ctx->uniform_float4_count++; break;
9322 case REG_TYPE_CONSTINT: ctx->uniform_int4_count++; break;
9323 case REG_TYPE_CONSTBOOL: ctx->uniform_bool_count++; break;
9324 default: break;
9325 } // switch
9326 } // if
9327 } // for
9328
9329 // ...and samplers...
9330 for (item = ctx->samplers.next; item != NULL; item = item->next)
9331 {
9332 ctx->sampler_count++;
9333 ctx->profile->sampler_emitter(ctx, item->regnum,
9334 (TextureType) item->index,
9335 item->misc != 0);
9336 } // for
9337
9338 // ...and attributes...
9339 for (item = ctx->attributes.next; item != NULL; item = item->next)
9340 {
9341 ctx->attribute_count++;
9342 ctx->profile->attribute_emitter(ctx, item->regtype, item->regnum,
9343 item->usage, item->index,
9344 item->writemask, item->misc);
9345 } // for
9346 } // process_definitions
9347
9348
verify_swizzles(Context * ctx)9349 static void verify_swizzles(Context *ctx)
9350 {
9351 size_t i;
9352 const char *failmsg = "invalid swizzle";
9353 for (i = 0; i < ctx->swizzles_count; i++)
9354 {
9355 const MOJOSHADER_swizzle *swiz = &ctx->swizzles[i];
9356 if (swiz->swizzles[0] > 3) { fail(ctx, failmsg); return; }
9357 if (swiz->swizzles[1] > 3) { fail(ctx, failmsg); return; }
9358 if (swiz->swizzles[2] > 3) { fail(ctx, failmsg); return; }
9359 if (swiz->swizzles[3] > 3) { fail(ctx, failmsg); return; }
9360 } // for
9361 } // verify_swizzles
9362
9363
9364 // API entry point...
9365
9366 // !!! FIXME:
9367 // MSDN: "Shader validation will fail CreatePixelShader on any shader that
9368 // attempts to read from a temporary register that has not been written by a
9369 // previous instruction." (true for ps_1_*, maybe others). Check this.
9370
MOJOSHADER_parse(const char * profile,const unsigned char * tokenbuf,const unsigned int bufsize,const MOJOSHADER_swizzle * swiz,const unsigned int swizcount,const MOJOSHADER_samplerMap * smap,const unsigned int smapcount,MOJOSHADER_malloc m,MOJOSHADER_free f,void * d)9371 const MOJOSHADER_parseData *MOJOSHADER_parse(const char *profile,
9372 const unsigned char *tokenbuf,
9373 const unsigned int bufsize,
9374 const MOJOSHADER_swizzle *swiz,
9375 const unsigned int swizcount,
9376 const MOJOSHADER_samplerMap *smap,
9377 const unsigned int smapcount,
9378 MOJOSHADER_malloc m,
9379 MOJOSHADER_free f, void *d)
9380 {
9381 MOJOSHADER_parseData *retval = NULL;
9382 Context *ctx = NULL;
9383 int rc = 0;
9384 int failed = 0;
9385
9386 if ( ((m == NULL) && (f != NULL)) || ((m != NULL) && (f == NULL)) )
9387 return &MOJOSHADER_out_of_mem_data; // supply both or neither.
9388
9389 ctx = build_context(profile, tokenbuf, bufsize, swiz, swizcount,
9390 smap, smapcount, m, f, d);
9391 if (ctx == NULL)
9392 return &MOJOSHADER_out_of_mem_data;
9393
9394 if (isfail(ctx))
9395 {
9396 retval = build_parsedata(ctx);
9397 destroy_context(ctx);
9398 return retval;
9399 } // if
9400
9401 verify_swizzles(ctx);
9402
9403 // Version token always comes first.
9404 ctx->current_position = 0;
9405 rc = parse_version_token(ctx, profile);
9406
9407 // drop out now if this definitely isn't bytecode. Saves lots of
9408 // meaningless errors flooding through.
9409 if (rc < 0)
9410 {
9411 retval = build_parsedata(ctx);
9412 destroy_context(ctx);
9413 return retval;
9414 } // if
9415
9416 if ( ((uint32) rc) > ctx->tokencount )
9417 {
9418 fail(ctx, "Corrupted or truncated shader");
9419 ctx->tokencount = rc;
9420 } // if
9421
9422 adjust_token_position(ctx, rc);
9423
9424 // parse out the rest of the tokens after the version token...
9425 while (ctx->tokencount > 0)
9426 {
9427 // reset for each token.
9428 if (isfail(ctx))
9429 {
9430 failed = 1;
9431 ctx->isfail = 0;
9432 } // if
9433
9434 rc = parse_token(ctx);
9435 if ( ((uint32) rc) > ctx->tokencount )
9436 {
9437 fail(ctx, "Corrupted or truncated shader");
9438 break;
9439 } // if
9440
9441 adjust_token_position(ctx, rc);
9442 } // while
9443
9444 ctx->current_position = MOJOSHADER_POSITION_AFTER;
9445
9446 // for ps_1_*, the output color is written to r0...throw an
9447 // error if this register was never written. This isn't
9448 // important for vertex shaders, or shader model 2+.
9449 if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
9450 {
9451 if (!register_was_written(ctx, REG_TYPE_TEMP, 0))
9452 fail(ctx, "r0 (pixel shader 1.x color output) never written to");
9453 } // if
9454
9455 if (!failed)
9456 {
9457 process_definitions(ctx);
9458 failed = isfail(ctx);
9459 } // if
9460
9461 if (!failed)
9462 ctx->profile->finalize_emitter(ctx);
9463
9464 ctx->isfail = failed;
9465 retval = build_parsedata(ctx);
9466 destroy_context(ctx);
9467 return retval;
9468 } // MOJOSHADER_parse
9469
9470
MOJOSHADER_freeParseData(const MOJOSHADER_parseData * _data)9471 void MOJOSHADER_freeParseData(const MOJOSHADER_parseData *_data)
9472 {
9473 MOJOSHADER_parseData *data = (MOJOSHADER_parseData *) _data;
9474 if ((data == NULL) || (data == &MOJOSHADER_out_of_mem_data))
9475 return; // no-op.
9476
9477 MOJOSHADER_free f = (data->free == NULL) ? MOJOSHADER_internal_free : data->free;
9478 void *d = data->malloc_data;
9479 int i;
9480
9481 // we don't f(data->profile), because that's internal static data.
9482
9483 f((void *) data->output, d);
9484 f((void *) data->constants, d);
9485 f((void *) data->swizzles, d);
9486
9487 for (i = 0; i < data->error_count; i++)
9488 {
9489 f((void *) data->errors[i].error, d);
9490 f((void *) data->errors[i].filename, d);
9491 } // for
9492 f((void *) data->errors, d);
9493
9494 for (i = 0; i < data->uniform_count; i++)
9495 f((void *) data->uniforms[i].name, d);
9496 f((void *) data->uniforms, d);
9497
9498 for (i = 0; i < data->attribute_count; i++)
9499 f((void *) data->attributes[i].name, d);
9500 f((void *) data->attributes, d);
9501
9502 for (i = 0; i < data->output_count; i++)
9503 f((void *) data->outputs[i].name, d);
9504 f((void *) data->outputs, d);
9505
9506 for (i = 0; i < data->sampler_count; i++)
9507 f((void *) data->samplers[i].name, d);
9508 f((void *) data->samplers, d);
9509
9510 free_symbols(f, d, data->symbols, data->symbol_count);
9511 free_preshader(f, d, data->preshader);
9512
9513 f(data, d);
9514 } // MOJOSHADER_freeParseData
9515
9516
MOJOSHADER_version(void)9517 int MOJOSHADER_version(void)
9518 {
9519 return MOJOSHADER_VERSION;
9520 } // MOJOSHADER_version
9521
9522
MOJOSHADER_changeset(void)9523 const char *MOJOSHADER_changeset(void)
9524 {
9525 return MOJOSHADER_CHANGESET;
9526 } // MOJOSHADER_changeset
9527
9528
MOJOSHADER_maxShaderModel(const char * profile)9529 int MOJOSHADER_maxShaderModel(const char *profile)
9530 {
9531 #define PROFILE_SHADER_MODEL(p,v) if (strcmp(profile, p) == 0) return v;
9532 PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_D3D, 3);
9533 PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_BYTECODE, 3);
9534 PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_GLSL, 3);
9535 PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_GLSL120, 3);
9536 PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_ARB1, 2);
9537 PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV2, 2);
9538 PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV3, 2);
9539 PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV4, 3);
9540 #undef PROFILE_SHADER_MODEL
9541 return -1; // unknown profile?
9542 } // MOJOSHADER_maxShaderModel
9543
9544 // end of mojoshader.c ...
9545
9546