1 // shader.cpp: OpenGL assembly/GLSL shader management
2 
3 #include "engine.h"
4 
5 Shader *Shader::lastshader = NULL;
6 
7 Shader *defaultshader = NULL, *rectshader = NULL, *notextureshader = NULL, *nocolorshader = NULL, *foggedshader = NULL, *foggednotextureshader = NULL, *stdworldshader = NULL;
8 
9 static hashtable<const char *, Shader> shaders;
10 static Shader *curshader = NULL;
11 static vector<ShaderParam> curparams;
12 static ShaderParamState vertexparamstate[RESERVEDSHADERPARAMS + MAXSHADERPARAMS], pixelparamstate[RESERVEDSHADERPARAMS + MAXSHADERPARAMS];
13 static bool dirtyenvparams = false, standardshader = false, initshaders = false, forceshaders = true;
14 
15 VAR(reservevpparams, 1, 16, 0);
16 VAR(maxvpenvparams, 1, 0, 0);
17 VAR(maxvplocalparams, 1, 0, 0);
18 VAR(maxfpenvparams, 1, 0, 0);
19 VAR(maxfplocalparams, 1, 0, 0);
20 
loadshaders()21 void loadshaders()
22 {
23     if(renderpath!=R_FIXEDFUNCTION)
24     {
25         GLint val;
26         glGetProgramiv_(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_ENV_PARAMETERS_ARB, &val);
27         maxvpenvparams = val;
28         glGetProgramiv_(GL_VERTEX_PROGRAM_ARB, GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB, &val);
29         maxvplocalparams = val;
30         glGetProgramiv_(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_ENV_PARAMETERS_ARB, &val);
31         maxfpenvparams = val;
32         glGetProgramiv_(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB, &val);
33         maxfplocalparams = val;
34     }
35 
36     initshaders = true;
37     standardshader = true;
38     execfile("stdshader.cfg");
39     standardshader = false;
40     initshaders = false;
41 
42     defaultshader = lookupshaderbyname("default");
43     stdworldshader = lookupshaderbyname("stdworld");
44     if(!defaultshader || !stdworldshader) fatal("cannot find shader definitions");
45 
46     extern Slot dummyslot;
47     dummyslot.shader = stdworldshader;
48 
49     rectshader = lookupshaderbyname("rect");
50     notextureshader = lookupshaderbyname("notexture");
51     nocolorshader = lookupshaderbyname("nocolor");
52     foggedshader = lookupshaderbyname("fogged");
53     foggednotextureshader = lookupshaderbyname("foggednotexture");
54 
55     if(renderpath!=R_FIXEDFUNCTION)
56     {
57         glEnable(GL_VERTEX_PROGRAM_ARB);
58         glEnable(GL_FRAGMENT_PROGRAM_ARB);
59     }
60 
61     defaultshader->set();
62 }
63 
lookupshaderbyname(const char * name)64 Shader *lookupshaderbyname(const char *name)
65 {
66     Shader *s = shaders.access(name);
67     return s && s->detailshader ? s : NULL;
68 }
69 
compileasmshader(GLenum type,GLuint & idx,const char * def,const char * tname,const char * name,bool msg=true,bool nativeonly=false)70 static bool compileasmshader(GLenum type, GLuint &idx, const char *def, const char *tname, const char *name, bool msg = true, bool nativeonly = false)
71 {
72 	glGenPrograms_(1, &idx);
73 	glBindProgram_(type, idx);
74 	def += strspn(def, " \t\r\n");
75 	glProgramString_(type, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(def), def);
76     GLint err = -1, native = 1;
77     glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &err);
78     extern int apple_vp_bug;
79     if(type!=GL_VERTEX_PROGRAM_ARB || !apple_vp_bug)
80         glGetProgramiv_(type, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native);
81 	if(msg && err!=-1)
82 	{
83 		conoutf("\frCOMPILE ERROR (%s:%s) - %s", tname, name, glGetString(GL_PROGRAM_ERROR_STRING_ARB));
84         if(err>=0 && err<(int)strlen(def))
85         {
86 			loopi(err) putchar(*def++);
87 			puts(" <<HERE>> ");
88 			while(*def) putchar(*def++);
89 		}
90     }
91     else if(msg && !native) conoutf("\fr%s:%s EXCEEDED NATIVE LIMITS", tname, name);
92     glBindProgram_(type, 0);
93     if(err!=-1 || (!native && nativeonly))
94 	{
95 		glDeletePrograms_(1, &idx);
96 		idx = 0;
97 	}
98     return native!=0;
99 }
100 
showglslinfo(GLhandleARB obj,const char * tname,const char * name)101 static void showglslinfo(GLhandleARB obj, const char *tname, const char *name)
102 {
103 	GLint length = 0;
104 	glGetObjectParameteriv_(obj, GL_OBJECT_INFO_LOG_LENGTH_ARB, &length);
105 	if(length > 1)
106 	{
107 		GLcharARB *log = new GLcharARB[length];
108 		glGetInfoLog_(obj, length, &length, log);
109 		conoutf("\frGLSL ERROR (%s:%s)", tname, name);
110 		puts(log);
111 		delete[] log;
112 	}
113 }
114 
compileglslshader(GLenum type,GLhandleARB & obj,const char * def,const char * tname,const char * name,bool msg=true)115 static void compileglslshader(GLenum type, GLhandleARB &obj, const char *def, const char *tname, const char *name, bool msg = true)
116 {
117 	const GLcharARB *source = (const GLcharARB*)(def + strspn(def, " \t\r\n"));
118 	obj = glCreateShaderObject_(type);
119 	glShaderSource_(obj, 1, &source, NULL);
120 	glCompileShader_(obj);
121 	GLint success;
122 	glGetObjectParameteriv_(obj, GL_OBJECT_COMPILE_STATUS_ARB, &success);
123 	if(!success)
124 	{
125         if(msg) showglslinfo(obj, tname, name);
126 		glDeleteObject_(obj);
127 		obj = 0;
128 	}
129 }
130 
linkglslprogram(Shader & s,bool msg=true)131 static void linkglslprogram(Shader &s, bool msg = true)
132 {
133     s.program = s.vsobj && s.psobj ? glCreateProgramObject_() : 0;
134     GLint success = 0;
135     if(s.program)
136 	{
137 		glAttachObject_(s.program, s.vsobj);
138 		glAttachObject_(s.program, s.psobj);
139 		glLinkProgram_(s.program);
140 		glGetObjectParameteriv_(s.program, GL_OBJECT_LINK_STATUS_ARB, &success);
141 	}
142 	if(success)
143 	{
144 		glUseProgramObject_(s.program);
145 		loopi(8)
146 		{
147 			defformatstring(arg)("tex%d", i);
148 			GLint loc = glGetUniformLocation_(s.program, arg);
149 			if(loc != -1) glUniform1i_(loc, i);
150 		}
151 		loopv(s.defaultparams)
152 		{
153 			ShaderParam &param = s.defaultparams[i];
154 			string pname;
155 			if(param.type==SHPARAM_UNIFORM) copystring(pname, param.name);
156 			else formatstring(pname)("%s%d", param.type==SHPARAM_VERTEX ? "v" : "p", param.index);
157 			param.loc = glGetUniformLocation_(s.program, pname);
158 		}
159 		glUseProgramObject_(0);
160 	}
161     else if(s.program)
162     {
163         if(msg) showglslinfo(s.program, "PROG", s.name);
164         glDeleteObject_(s.program);
165         s.program = 0;
166     }
167 }
168 
checkglslsupport()169 bool checkglslsupport()
170 {
171 	/* check if GLSL profile supports loops
172 	 * might need to rewrite this if compiler does strength reduction
173 	 */
174 	const GLcharARB *source =
175 		"uniform int N;\n"
176 		"uniform vec4 delta;\n"
177 		"void main(void) {\n"
178 		"	vec4 test = vec4(0.0, 0.0, 0.0, 0.0);\n"
179 		"	for(int i = 0; i < N; i++)  test += delta;\n"
180 		"	gl_FragColor = test;\n"
181 		"}\n";
182 	GLhandleARB obj = glCreateShaderObject_(GL_FRAGMENT_SHADER_ARB);
183 	if(!obj) return false;
184 	glShaderSource_(obj, 1, &source, NULL);
185 	glCompileShader_(obj);
186 	GLint success;
187 	glGetObjectParameteriv_(obj, GL_OBJECT_COMPILE_STATUS_ARB, &success);
188 	if(!success)
189 	{
190 		glDeleteObject_(obj);
191 		return false;
192 	}
193 	GLhandleARB program = glCreateProgramObject_();
194 	if(!program)
195 	{
196 		glDeleteObject_(obj);
197 		return false;
198 	}
199 	glAttachObject_(program, obj);
200 	glLinkProgram_(program);
201 	glGetObjectParameteriv_(program, GL_OBJECT_LINK_STATUS_ARB, &success);
202 	glDeleteObject_(obj);
203 	glDeleteObject_(program);
204 	return success!=0;
205 }
206 
207 #define ALLOCEXTPARAM 0xFF
208 #define UNUSEDEXTPARAM 0xFE
209 
allocglsluniformparam(Shader & s,int type,int index,bool local=false)210 static void allocglsluniformparam(Shader &s, int type, int index, bool local = false)
211 {
212     ShaderParamState &val = (type==SHPARAM_VERTEX ? vertexparamstate[index] : pixelparamstate[index]);
213     int loc = val.name ? glGetUniformLocation_(s.program, val.name) : -1;
214     if(loc == -1)
215     {
216         defformatstring(altname)("%s%d", type==SHPARAM_VERTEX ? "v" : "p", index);
217         loc = glGetUniformLocation_(s.program, val.name);
218     }
219     else
220     {
221         uchar alt = (type==SHPARAM_VERTEX ? s.extpixparams[index] : s.extvertparams[index]);
222         if(alt < RESERVEDSHADERPARAMS && s.extparams[alt].loc == loc)
223         {
224             if(type==SHPARAM_VERTEX) s.extvertparams[index] = alt;
225             else s.extpixparams[index] = alt;
226             return;
227         }
228     }
229     if(loc == -1)
230     {
231         if(type==SHPARAM_VERTEX) s.extvertparams[index] = local ? UNUSEDEXTPARAM : ALLOCEXTPARAM;
232         else s.extpixparams[index] = local ? UNUSEDEXTPARAM : ALLOCEXTPARAM;
233         return;
234     }
235     if(!(s.numextparams%4))
236     {
237         LocalShaderParamState *extparams = new LocalShaderParamState[s.numextparams+4];
238         if(s.extparams)
239         {
240             memcpy(extparams, s.extparams, s.numextparams*sizeof(LocalShaderParamState));
241             delete[] s.extparams;
242         }
243         s.extparams = extparams;
244     }
245     LocalShaderParamState &ext = s.extparams[s.numextparams];
246     ext.name = val.name;
247     ext.type = type;
248     ext.index = local ? -1 : index;
249     ext.loc = loc;
250     if(type==SHPARAM_VERTEX) s.extvertparams[index] = s.numextparams;
251     else s.extpixparams[index] = s.numextparams;
252     s.numextparams++;
253 }
254 
allocenvparams(Slot * slot)255 void Shader::allocenvparams(Slot *slot)
256 {
257 	if(!(type & SHADER_GLSLANG)) return;
258 
259 	if(slot)
260 	{
261 #define UNIFORMTEX(name, tmu) \
262 		{ \
263 			loc = glGetUniformLocation_(program, name); \
264 			int val = tmu; \
265 			if(loc != -1) glUniform1i_(loc, val); \
266 		}
267 		int loc, tmu = 2;
268 		if(type & SHADER_NORMALSLMS)
269 		{
270 			UNIFORMTEX("lmcolor", 1);
271 			UNIFORMTEX("lmdir", 2);
272 			tmu++;
273 		}
274 		else UNIFORMTEX("lightmap", 1);
275 		if(type & SHADER_ENVMAP) UNIFORMTEX("envmap", tmu++);
276         UNIFORMTEX("shadowmap", 7);
277 		int stex = 0;
278 		loopv(slot->sts)
279 		{
280 			Slot::Tex &t = slot->sts[i];
281 			switch(t.type)
282 			{
283 				case TEX_DIFFUSE: UNIFORMTEX("diffusemap", 0); break;
284 				case TEX_NORMAL: UNIFORMTEX("normalmap", tmu++); break;
285 				case TEX_GLOW: UNIFORMTEX("glowmap", tmu++); break;
286 				case TEX_DECAL: UNIFORMTEX("decal", tmu++); break;
287 				case TEX_SPEC: if(t.combined<0) UNIFORMTEX("specmap", tmu++); break;
288 				case TEX_DEPTH: if(t.combined<0) UNIFORMTEX("depthmap", tmu++); break;
289 				case TEX_UNKNOWN:
290 				{
291 					defformatstring(sname)("stex%d", stex++);
292 					UNIFORMTEX(sname, tmu++);
293 					break;
294 				}
295 			}
296 		}
297 	}
298     if(!extvertparams)
299     {
300         extvertparams = new uchar[2*RESERVEDSHADERPARAMS];
301         extpixparams = extvertparams + RESERVEDSHADERPARAMS;
302     }
303     memset(extvertparams, ALLOCEXTPARAM, 2*RESERVEDSHADERPARAMS);
304 	loopi(RESERVEDSHADERPARAMS) if(vertexparamstate[i].name && !vertexparamstate[i].local)
305 		allocglsluniformparam(*this, SHPARAM_VERTEX, i);
306 	loopi(RESERVEDSHADERPARAMS) if(pixelparamstate[i].name && !pixelparamstate[i].local)
307 		allocglsluniformparam(*this, SHPARAM_PIXEL, i);
308 }
309 
flushparam(int type,int index)310 static inline void flushparam(int type, int index)
311 {
312     ShaderParamState &val = (type==SHPARAM_VERTEX ? vertexparamstate[index] : pixelparamstate[index]);
313     if(Shader::lastshader && Shader::lastshader->type&SHADER_GLSLANG)
314     {
315         uchar &extindex = (type==SHPARAM_VERTEX ? Shader::lastshader->extvertparams[index] : Shader::lastshader->extpixparams[index]);
316         if(extindex == ALLOCEXTPARAM) allocglsluniformparam(*Shader::lastshader, type, index, val.local);
317         if(extindex >= RESERVEDSHADERPARAMS) return;
318         LocalShaderParamState &ext = Shader::lastshader->extparams[extindex];
319         if(!memcmp(ext.curval, val.val, sizeof(ext.curval))) return;
320         memcpy(ext.curval, val.val, sizeof(ext.curval));
321         glUniform4fv_(ext.loc, 1, ext.curval);
322     }
323     else if(val.dirty==ShaderParamState::DIRTY)
324     {
325         glProgramEnvParameter4fv_(type==SHPARAM_VERTEX ? GL_VERTEX_PROGRAM_ARB : GL_FRAGMENT_PROGRAM_ARB, index, val.val);
326         val.dirty = ShaderParamState::CLEAN;
327     }
328 }
329 
setparamf(const char * name,int type,int index,float x,float y,float z,float w)330 static inline ShaderParamState &setparamf(const char *name, int type, int index, float x, float y, float z, float w)
331 {
332     ShaderParamState &val = (type==SHPARAM_VERTEX ? vertexparamstate[index] : pixelparamstate[index]);
333     val.name = name;
334     if(val.dirty==ShaderParamState::INVALID || val.val[0]!=x || val.val[1]!=y || val.val[2]!=z || val.val[3]!=w)
335     {
336         val.val[0] = x;
337         val.val[1] = y;
338         val.val[2] = z;
339         val.val[3] = w;
340         val.dirty = ShaderParamState::DIRTY;
341     }
342     return val;
343 }
344 
setparamfv(const char * name,int type,int index,const float * v)345 static inline ShaderParamState &setparamfv(const char *name, int type, int index, const float *v)
346 {
347     ShaderParamState &val = (type==SHPARAM_VERTEX ? vertexparamstate[index] : pixelparamstate[index]);
348     val.name = name;
349     if(val.dirty==ShaderParamState::INVALID || memcmp(val.val, v, sizeof(val.val)))
350     {
351         memcpy(val.val, v, sizeof(val.val));
352         val.dirty = ShaderParamState::DIRTY;
353     }
354     return val;
355 }
356 
setenvparamf(const char * name,int type,int index,float x,float y,float z,float w)357 void setenvparamf(const char *name, int type, int index, float x, float y, float z, float w)
358 {
359     ShaderParamState &val = setparamf(name, type, index, x, y, z, w);
360     val.local = false;
361     if(val.dirty==ShaderParamState::DIRTY) dirtyenvparams = true;
362 }
363 
setenvparamfv(const char * name,int type,int index,const float * v)364 void setenvparamfv(const char *name, int type, int index, const float *v)
365 {
366     ShaderParamState &val = setparamfv(name, type, index, v);
367     val.local = false;
368     if(val.dirty==ShaderParamState::DIRTY) dirtyenvparams = true;
369 }
370 
flushenvparamf(const char * name,int type,int index,float x,float y,float z,float w)371 void flushenvparamf(const char *name, int type, int index, float x, float y, float z, float w)
372 {
373     ShaderParamState &val = setparamf(name, type, index, x, y, z, w);
374     val.local = false;
375     flushparam(type, index);
376 }
377 
flushenvparamfv(const char * name,int type,int index,const float * v)378 void flushenvparamfv(const char *name, int type, int index, const float *v)
379 {
380     ShaderParamState &val = setparamfv(name, type, index, v);
381     val.local = false;
382     flushparam(type, index);
383 }
384 
setlocalparamf(const char * name,int type,int index,float x,float y,float z,float w)385 void setlocalparamf(const char *name, int type, int index, float x, float y, float z, float w)
386 {
387     ShaderParamState &val = setparamf(name, type, index, x, y, z, w);
388     val.local = true;
389     flushparam(type, index);
390 }
391 
setlocalparamfv(const char * name,int type,int index,const float * v)392 void setlocalparamfv(const char *name, int type, int index, const float *v)
393 {
394     ShaderParamState &val = setparamfv(name, type, index, v);
395     val.local = true;
396     flushparam(type, index);
397 }
398 
invalidateenvparams(int type,int start,int count)399 void invalidateenvparams(int type, int start, int count)
400 {
401     ShaderParamState *paramstate = type==SHPARAM_VERTEX ? vertexparamstate : pixelparamstate;
402     int end = min(start + count, RESERVEDSHADERPARAMS + MAXSHADERPARAMS);
403     while(start < end)
404     {
405         paramstate[start].dirty = ShaderParamState::INVALID;
406         start++;
407     }
408 }
409 
flushenvparams(Slot * slot)410 void Shader::flushenvparams(Slot *slot)
411 {
412 	if(type & SHADER_GLSLANG)
413 	{
414 		if(!used) allocenvparams(slot);
415 
416 		loopi(numextparams)
417 		{
418 			LocalShaderParamState &ext = extparams[i];
419 			if(ext.index<0) continue;
420 			float *val = ext.type==SHPARAM_VERTEX ? vertexparamstate[ext.index].val : pixelparamstate[ext.index].val;
421 			if(!memcmp(ext.curval, val, sizeof(ext.val))) continue;
422 			memcpy(ext.curval, val, sizeof(ext.val));
423 			glUniform4fv_(ext.loc, 1, ext.curval);
424 		}
425 	}
426     else if(dirtyenvparams)
427     {
428         loopi(RESERVEDSHADERPARAMS)
429         {
430             ShaderParamState &val = vertexparamstate[i];
431             if(val.local || val.dirty!=ShaderParamState::DIRTY) continue;
432             glProgramEnvParameter4fv_(GL_VERTEX_PROGRAM_ARB, i, val.val);
433             val.dirty = ShaderParamState::CLEAN;
434         }
435         loopi(RESERVEDSHADERPARAMS)
436         {
437             ShaderParamState &val = pixelparamstate[i];
438             if(val.local || val.dirty!=ShaderParamState::DIRTY) continue;
439             glProgramEnvParameter4fv_(GL_FRAGMENT_PROGRAM_ARB, i, val.val);
440             val.dirty = ShaderParamState::CLEAN;
441         }
442         dirtyenvparams = false;
443     }
444 	used = true;
445 }
446 
setslotparams(Slot & slot)447 void Shader::setslotparams(Slot &slot)
448 {
449     uint unimask = 0, vertmask = 0, pixmask = 0;
450     loopv(slot.params)
451     {
452         ShaderParam &p = slot.params[i];
453         if(!defaultparams.inrange(p.loc)) continue;
454         LocalShaderParamState &l = defaultparams[p.loc];
455         if(type & SHADER_GLSLANG)
456         {
457             unimask |= p.loc;
458             if(!memcmp(l.curval, p.val, sizeof(l.curval))) continue;
459             memcpy(l.curval, p.val, sizeof(l.curval));
460             glUniform4fv_(l.loc, 1, l.curval);
461         }
462         else if(p.type!=SHPARAM_UNIFORM)
463         {
464             ShaderParamState &val = (l.type==SHPARAM_VERTEX ? vertexparamstate[RESERVEDSHADERPARAMS+l.index] : pixelparamstate[RESERVEDSHADERPARAMS+l.index]);
465             if(l.type==SHPARAM_VERTEX) vertmask |= 1<<l.index;
466             else pixmask |= 1<<l.index;
467             if(memcmp(val.val, p.val, sizeof(val.val))) memcpy(val.val, p.val, sizeof(val.val));
468             else if(val.dirty==ShaderParamState::CLEAN) continue;
469             glProgramEnvParameter4fv_(l.type==SHPARAM_VERTEX ? GL_VERTEX_PROGRAM_ARB : GL_FRAGMENT_PROGRAM_ARB, RESERVEDSHADERPARAMS+l.index, val.val);
470             val.local = true;
471             val.dirty = ShaderParamState::CLEAN;
472         }
473     }
474     loopv(defaultparams)
475     {
476         LocalShaderParamState &l = defaultparams[i];
477         if(type & SHADER_GLSLANG)
478         {
479             if(unimask&(1<<i)) continue;
480             if(!memcmp(l.curval, l.val, sizeof(l.curval))) continue;
481             memcpy(l.curval, l.val, sizeof(l.curval));
482             glUniform4fv_(l.loc, 1, l.curval);
483         }
484         else if(l.type!=SHPARAM_UNIFORM)
485         {
486             if(l.type==SHPARAM_VERTEX)
487             {
488                 if(vertmask & (1<<l.index)) continue;
489             }
490             else if(pixmask & (1<<l.index)) continue;
491             ShaderParamState &val = (l.type==SHPARAM_VERTEX ? vertexparamstate[RESERVEDSHADERPARAMS+l.index] : pixelparamstate[RESERVEDSHADERPARAMS+l.index]);
492             if(memcmp(val.val, l.val, sizeof(val.val))) memcpy(val.val, l.val, sizeof(val.val));
493             else if(val.dirty==ShaderParamState::CLEAN) continue;
494             glProgramEnvParameter4fv_(l.type==SHPARAM_VERTEX ? GL_VERTEX_PROGRAM_ARB : GL_FRAGMENT_PROGRAM_ARB, RESERVEDSHADERPARAMS+l.index, val.val);
495             val.local = true;
496             val.dirty = ShaderParamState::CLEAN;
497         }
498     }
499 }
500 
bindprograms()501 void Shader::bindprograms()
502 {
503     if(this == lastshader || type&(SHADER_DEFERRED|SHADER_INVALID)) return;
504     if(type & SHADER_GLSLANG)
505     {
506         glUseProgramObject_(program);
507     }
508     else
509     {
510         if(lastshader && lastshader->type & SHADER_GLSLANG) glUseProgramObject_(0);
511 
512         glBindProgram_(GL_VERTEX_PROGRAM_ARB,   vs);
513         glBindProgram_(GL_FRAGMENT_PROGRAM_ARB, ps);
514     }
515     lastshader = this;
516 }
517 
518 VARFN(shaders, useshaders, -1, -1, 1, initwarning("shaders"));
519 VARF(shaderprecision, 0, 0, 2, initwarning("shader quality"));
520 
521 VAR(dbgshader, 0, 0, 1);
522 
compile()523 bool Shader::compile()
524 {
525     if(type & SHADER_GLSLANG)
526     {
527         if(!vsstr) vsobj = !reusevs || reusevs->type&SHADER_INVALID ? 0 : reusevs->vsobj;
528         else compileglslshader(GL_VERTEX_SHADER_ARB,   vsobj, vsstr, "VS", name, dbgshader || !variantshader);
529         if(!psstr) psobj = !reuseps || reuseps->type&SHADER_INVALID ? 0 : reuseps->psobj;
530         else compileglslshader(GL_FRAGMENT_SHADER_ARB, psobj, psstr, "PS", name, dbgshader || !variantshader);
531         linkglslprogram(*this, !variantshader);
532         return program!=0;
533     }
534     else
535     {
536         if(!vsstr) vs = !reusevs || reusevs->type&SHADER_INVALID ? 0 : reusevs->vs;
537         else if(!compileasmshader(GL_VERTEX_PROGRAM_ARB, vs, vsstr, "VS", name, dbgshader || !variantshader, variantshader!=NULL))
538             native = false;
539         if(!psstr) ps = !reuseps || reuseps->type&SHADER_INVALID ? 0 : reuseps->ps;
540         else if(!compileasmshader(GL_FRAGMENT_PROGRAM_ARB, ps, psstr, "PS", name, dbgshader || !variantshader, variantshader!=NULL))
541             native = false;
542         return vs && ps && (!variantshader || native);
543     }
544 }
545 
cleanup(bool invalid)546 void Shader::cleanup(bool invalid)
547 {
548     detailshader = NULL;
549     used = false;
550     native = true;
551     if(vs) { if(reusevs) glDeletePrograms_(1, &vs); vs = 0; }
552     if(ps) { if(reuseps) glDeletePrograms_(1, &ps); ps = 0; }
553     if(vsobj) { if(reusevs) glDeleteObject_(vsobj); vsobj = 0; }
554     if(psobj) { if(reuseps) glDeleteObject_(psobj); psobj = 0; }
555     if(program) { glDeleteObject_(program); program = 0; }
556     numextparams = 0;
557     DELETEA(extparams);
558     DELETEA(extvertparams);
559     extpixparams = NULL;
560     loopv(defaultparams) memset(defaultparams[i].curval, -1, sizeof(defaultparams[i].curval));
561     if(standard || invalid)
562     {
563         type = SHADER_INVALID;
564         loopi(MAXVARIANTROWS) variants[i].setsizenodelete(0);
565         DELETEA(vsstr);
566         DELETEA(psstr);
567         DELETEA(defer);
568         defaultparams.setsizenodelete(0);
569         altshader = NULL;
570         loopi(MAXSHADERDETAIL) fastshader[i] = this;
571         reusevs = reuseps = NULL;
572     }
573 }
574 
newshader(int type,const char * name,const char * vs,const char * ps,Shader * variant=NULL,int row=0)575 Shader *newshader(int type, const char *name, const char *vs, const char *ps, Shader *variant = NULL, int row = 0)
576 {
577     if(Shader::lastshader)
578     {
579         if(renderpath!=R_FIXEDFUNCTION)
580         {
581             glBindProgram_(GL_VERTEX_PROGRAM_ARB, 0);
582             glBindProgram_(GL_FRAGMENT_PROGRAM_ARB, 0);
583             if(renderpath==R_GLSLANG) glUseProgramObject_(0);
584         }
585         Shader::lastshader = NULL;
586     }
587 
588     Shader *exists = shaders.access(name);
589     char *rname = exists ? exists->name : newstring(name);
590     Shader &s = shaders[rname];
591     s.name = rname;
592     s.vsstr = newstring(vs);
593     s.psstr = newstring(ps);
594     DELETEA(s.defer);
595     s.type = type;
596     s.variantshader = variant;
597     s.standard = standardshader;
598     if(forceshaders) s.forced = true;
599     s.reusevs = s.reuseps = NULL;
600     if(variant)
601     {
602         int row = 0, col = 0;
603         if(!vs[0] || sscanf(vs, "%d , %d", &row, &col) >= 1)
604         {
605             DELETEA(s.vsstr);
606             s.reusevs = !vs[0] ? variant : (variant->variants[row].inrange(col) ? variant->variants[row][col] : NULL);
607         }
608         row = col = 0;
609         if(!ps[0] || sscanf(ps, "%d , %d", &row, &col) >= 1)
610         {
611             DELETEA(s.psstr);
612             s.reuseps = !ps[0] ? variant : (variant->variants[row].inrange(col) ? variant->variants[row][col] : NULL);
613         }
614     }
615     if(variant) loopv(variant->defaultparams) s.defaultparams.add(variant->defaultparams[i]);
616     else loopv(curparams) s.defaultparams.add(curparams[i]);
617     if(renderpath!=R_FIXEDFUNCTION && !s.compile())
618     {
619         s.cleanup(true);
620         if(variant) shaders.remove(rname);
621         return NULL;
622     }
623     if(variant) variant->variants[row].add(&s);
624     s.fixdetailshader();
625     return &s;
626 }
627 
findusedtexcoords(const char * str)628 static uint findusedtexcoords(const char *str)
629 {
630 	uint used = 0;
631 	for(;;)
632 	{
633 		const char *tc = strstr(str, "result.texcoord[");
634 		if(!tc) break;
635 		tc += strlen("result.texcoord[");
636 		int n = strtol(tc, (char **)&str, 10);
637         if(n<0 || n>=16) continue;
638 		used |= 1<<n;
639 	}
640 	return used;
641 }
642 
findunusedtexcoordcomponent(const char * str,int & texcoord,int & component)643 static bool findunusedtexcoordcomponent(const char *str, int &texcoord, int &component)
644 {
645     uchar texcoords[16];
646     memset(texcoords, 0, sizeof(texcoords));
647     for(;;)
648     {
649         const char *tc = strstr(str, "result.texcoord[");
650         if(!tc) break;
651         tc += strlen("result.texcoord[");
652         int n = strtol(tc, (char **)&str, 10);
653         if(n<0 || n>=(int)sizeof(texcoords)) continue;
654         while(*str && *str!=']') str++;
655         if(*str==']')
656         {
657             if(*++str!='.') { texcoords[n] = 0xF; continue; }
658             for(;;)
659             {
660                 switch(*++str)
661                 {
662                     case 'r': case 'x': texcoords[n] |= 1; continue;
663                     case 'g': case 'y': texcoords[n] |= 2; continue;
664                     case 'b': case 'z': texcoords[n] |= 4; continue;
665                     case 'a': case 'w': texcoords[n] |= 8; continue;
666                 }
667                 break;
668             }
669         }
670     }
671     loopi(sizeof(texcoords)) if(texcoords[i]>0 && texcoords[i]<0xF)
672     {
673         loopk(4) if(!(texcoords[i]&(1<<k))) { texcoord = i; component = k; return true; }
674     }
675     return false;
676 }
677 
678 #define EMUFOGVS(cond, vsbuf, start, end, fogcoord, fogtc, fogcomp) \
679     if(cond) \
680     { \
681         vsbuf.put(start, fogcoord-start); \
682         const char *afterfogcoord = fogcoord + strlen("result.fogcoord"); \
683         if(*afterfogcoord=='.') afterfogcoord += 2; \
684         defformatstring(repfogcoord)("result.texcoord[%d].%c", fogtc, fogcomp==3 ? 'w' : 'x'+fogcomp); \
685         vsbuf.put(repfogcoord, strlen(repfogcoord)); \
686         vsbuf.put(afterfogcoord, end-afterfogcoord); \
687     } \
688     else vsbuf.put(start, end-start);
689 
690 #define EMUFOGPS(cond, psbuf, fogtc, fogcomp) \
691     if(cond) \
692     { \
693         char *fogoption = strstr(psbuf.getbuf(), "OPTION ARB_fog_linear;"); \
694         /*                    OPTION ARB_fog_linear; */ \
695         const char *tmpdef = "TEMP emufogcolor;     "; \
696         if(fogoption) while(*tmpdef) *fogoption++ = *tmpdef++; \
697         /*                    result.color */\
698         const char *tmpuse = " emufogcolor"; \
699         char *str = psbuf.getbuf(); \
700         for(;;) \
701         { \
702             str = strstr(str, "result.color"); \
703             if(!str) break; \
704             if(str[12]!='.' || (str[13]!='a' && str[13]!='w')) memcpy(str, tmpuse, strlen(tmpuse)); \
705             str += 12; \
706         } \
707         defformatstring(fogtcstr)("fragment.texcoord[%d].%c", fogtc, fogcomp==3 ? 'w' : 'x'+fogcomp); \
708         str = strstr(psbuf.getbuf(), "fragment.fogcoord.x"); \
709         if(str) \
710         { \
711             int fogtclen = strlen(fogtcstr); \
712             memcpy(str, fogtcstr, 19); \
713             psbuf.insert(&str[19] - psbuf.getbuf(), &fogtcstr[19], fogtclen-19); \
714         } \
715         char *end = strstr(psbuf.getbuf(), "END"); \
716         if(end) psbuf.setsizenodelete(end - psbuf.getbuf()); \
717         defformatstring(calcfog)( \
718             "TEMP emufog;\n" \
719             "SUB emufog, state.fog.params.z, %s;\n" \
720             "MUL_SAT emufog, emufog, state.fog.params.w;\n" \
721             "LRP result.color.rgb, emufog, emufogcolor, state.fog.color;\n" \
722             "END\n", \
723             fogtcstr); \
724         psbuf.put(calcfog, strlen(calcfog)+1); \
725     }
726 
727 VAR(reserveshadowmaptc, 1, 0, 0);
728 VAR(reservedynlighttc, 1, 0, 0);
729 VAR(minimizedynlighttcusage, 1, 0, 0);
730 
gengenericvariant(Shader & s,const char * sname,const char * vs,const char * ps,int row)731 static void gengenericvariant(Shader &s, const char *sname, const char *vs, const char *ps, int row)
732 {
733     bool vschanged = false, pschanged = false;
734     vector<char> vsv, psv;
735     vsv.put(vs, strlen(vs)+1);
736     psv.put(ps, strlen(ps)+1);
737 
738     int len = strlen("#pragma CUBE2_variant");
739     for(char *vspragma = vsv.getbuf();; vschanged = true)
740     {
741         vspragma = strstr(vspragma, "#pragma CUBE2_variant");
742         if(!vspragma) break;
743         memset(vspragma, ' ', len);
744     }
745     for(char *pspragma = psv.getbuf();; pschanged = true)
746     {
747         pspragma = strstr(pspragma, "#pragma CUBE2_variant");
748         if(!pspragma) break;
749         memset(pspragma, ' ', len);
750     }
751     defformatstring(varname)("<variant:%d,%d>%s", s.variants[row].length(), row, sname);
752     defformatstring(reuse)("%d", row);
753     newshader(s.type, varname, vschanged ? vsv.getbuf() : reuse, pschanged ? psv.getbuf() : reuse, &s, row);
754 }
755 
genwatervariant(Shader & s,const char * sname,vector<char> & vs,vector<char> & ps,int row)756 static bool genwatervariant(Shader &s, const char *sname, vector<char> &vs, vector<char> &ps, int row)
757 {
758     char *vspragma = strstr(vs.getbuf(), "#pragma CUBE2_water");
759     if(!vspragma) return false;
760     char *pspragma = strstr(ps.getbuf(), "#pragma CUBE2_water");
761     if(!pspragma) return false;
762     vspragma += strcspn(vspragma, "\n");
763     if(*vspragma) vspragma++;
764     pspragma += strcspn(pspragma, "\n");
765     if(*pspragma) pspragma++;
766     if(s.type & SHADER_GLSLANG)
767     {
768         const char *fadedef = "waterfade = gl_Vertex.z*fogselect.y + fogselect.z;\n";
769         vs.insert(vspragma-vs.getbuf(), fadedef, strlen(fadedef));
770         const char *fadeuse = "gl_FragColor.a = waterfade;\n";
771         ps.insert(pspragma-ps.getbuf(), fadeuse, strlen(fadeuse));
772         const char *fadedecl = "varying float waterfade;\n";
773         vs.insert(0, fadedecl, strlen(fadedecl));
774         ps.insert(0, fadedecl, strlen(fadedecl));
775     }
776     else
777     {
778         int fadetc = -1, fadecomp = -1;
779         if(!findunusedtexcoordcomponent(vs.getbuf(), fadetc, fadecomp))
780         {
781             uint usedtc = findusedtexcoords(vs.getbuf());
782             GLint maxtc = 0;
783             glGetIntegerv(GL_MAX_TEXTURE_COORDS_ARB, &maxtc);
784             int reservetc = row%2 ? reserveshadowmaptc : reservedynlighttc;
785             loopi(maxtc-reservetc) if(!(usedtc&(1<<i))) { fadetc = i; fadecomp = 3; break; }
786         }
787         if(fadetc>=0)
788         {
789             defformatstring(fadedef)("MAD result.texcoord[%d].%c, vertex.position.z, program.env[8].y, program.env[8].z;\n",
790                                 fadetc, fadecomp==3 ? 'w' : 'x'+fadecomp);
791             vs.insert(vspragma-vs.getbuf(), fadedef, strlen(fadedef));
792             defformatstring(fadeuse)("MOV result.color.a, fragment.texcoord[%d].%c;\n",
793                                 fadetc, fadecomp==3 ? 'w' : 'x'+fadecomp);
794             ps.insert(pspragma-ps.getbuf(), fadeuse, strlen(fadeuse));
795         }
796         else // fallback - use fog value, works under water but not above
797         {
798             const char *fogfade = "MAD result.color.a, fragment.fogcoord.x, 0.25, 0.5;\n";
799             ps.insert(pspragma-ps.getbuf(), fogfade, strlen(fogfade));
800         }
801     }
802     defformatstring(name)("<water>%s", sname);
803     Shader *variant = newshader(s.type, name, vs.getbuf(), ps.getbuf(), &s, row);
804     return variant!=NULL;
805 }
806 
genwatervariant(Shader & s,const char * sname,const char * vs,const char * ps,int row=2)807 static void genwatervariant(Shader &s, const char *sname, const char *vs, const char *ps, int row = 2)
808 {
809     vector<char> vsw, psw;
810     vsw.put(vs, strlen(vs)+1);
811     psw.put(ps, strlen(ps)+1);
812     genwatervariant(s, sname, vsw, psw, row);
813 }
814 
gendynlightvariant(Shader & s,const char * sname,const char * vs,const char * ps,int row=0)815 static void gendynlightvariant(Shader &s, const char *sname, const char *vs, const char *ps, int row = 0)
816 {
817 	int numlights = 0, lights[MAXDYNLIGHTS];
818     int emufogtc = -1, emufogcomp = -1;
819     const char *emufogcoord = NULL;
820     if(s.type & SHADER_GLSLANG) numlights = minimizedynlighttcusage ? 1 : MAXDYNLIGHTS;
821     else
822     {
823         uint usedtc = findusedtexcoords(vs);
824         GLint maxtc = 0;
825         glGetIntegerv(GL_MAX_TEXTURE_COORDS_ARB, &maxtc);
826         int reservetc = row%2 ? reserveshadowmaptc : reservedynlighttc;
827         if(maxtc-reservetc<0) return;
828         int limit = minimizedynlighttcusage ? 1 : MAXDYNLIGHTS;
829         loopi(maxtc-reservetc) if(!(usedtc&(1<<i)))
830         {
831             lights[numlights++] = i;
832             if(numlights>=limit) break;
833         }
834         extern int emulatefog;
835         if(emulatefog && reservetc>0 && numlights+1<limit && !(usedtc&(1<<(maxtc-reservetc))) && strstr(ps, "OPTION ARB_fog_linear;"))
836         {
837             emufogcoord = strstr(vs, "result.fogcoord");
838             if(emufogcoord)
839             {
840                 if(!findunusedtexcoordcomponent(vs, emufogtc, emufogcomp))
841                 {
842                     emufogtc = maxtc-reservetc;
843                     emufogcomp = 3;
844                 }
845                 lights[numlights++] = maxtc-reservetc;
846             }
847         }
848 		if(!numlights) return;
849 	}
850 
851 	const char *vspragma = strstr(vs, "#pragma CUBE2_dynlight"), *pspragma = strstr(ps, "#pragma CUBE2_dynlight");
852 	string pslight;
853 	vspragma += strcspn(vspragma, "\n");
854 	if(*vspragma) vspragma++;
855 
856 	if(sscanf(pspragma, "#pragma CUBE2_dynlight %s", pslight)!=1) return;
857 
858 	pspragma += strcspn(pspragma, "\n");
859 	if(*pspragma) pspragma++;
860 
861 	vector<char> vsdl, psdl;
862 	loopi(MAXDYNLIGHTS)
863 	{
864 		vsdl.setsizenodelete(0);
865 		psdl.setsizenodelete(0);
866 
867         if(s.type & SHADER_GLSLANG)
868         {
869             loopk(i+1)
870             {
871                 defformatstring(pos)("%sdynlight%dpos%s",
872                     !k || k==numlights ? "uniform vec4 " : " ",
873                     k,
874                     k==i || k+1==numlights ? ";\n" : ",");
875                 if(k<numlights) vsdl.put(pos, strlen(pos));
876                 else psdl.put(pos, strlen(pos));
877             }
878             loopk(i+1)
879             {
880                 defformatstring(color)("%sdynlight%dcolor%s", !k ? "uniform vec4 " : " ", k, k==i ? ";\n" : ",");
881                 psdl.put(color, strlen(color));
882             }
883             loopk(min(i+1, numlights))
884             {
885                 defformatstring(dir)("%sdynlight%ddir%s", !k ? "varying vec3 " : " ", k, k==i || k+1==numlights ? ";\n" : ",");
886                 vsdl.put(dir, strlen(dir));
887                 psdl.put(dir, strlen(dir));
888             }
889         }
890 
891         EMUFOGVS(emufogcoord && i+1==numlights && emufogcoord < vspragma, vsdl, vs, vspragma, emufogcoord, emufogtc, emufogcomp);
892 		psdl.put(ps, pspragma-ps);
893 
894         loopk(i+1)
895         {
896             extern int ati_dph_bug;
897             string tc, dl;
898             if(s.type & SHADER_GLSLANG) formatstring(tc)(
899                 k<numlights ?
900                     "dynlight%ddir = gl_Vertex.xyz*dynlight%dpos.w + dynlight%dpos.xyz;\n" :
901                     "vec3 dynlight%ddir = dynlight0dir*dynlight%dpos.w + dynlight%dpos.xyz;\n",
902                 k, k, k);
903             else if(k>=numlights) formatstring(tc)(
904                 "%s"
905                 "MAD dynlightdir.xyz, fragment.texcoord[%d], program.env[%d].w, program.env[%d];\n",
906                 k==numlights ? "TEMP dynlightdir;\n" : "",
907                 lights[0], k-1, k-1);
908             else if(ati_dph_bug || lights[k]==emufogtc) formatstring(tc)(
909                 "MAD result.texcoord[%d].xyz, vertex.position, program.env[%d].w, program.env[%d];\n",
910                 lights[k], 10+k, 10+k);
911             else formatstring(tc)(
912                 "MAD result.texcoord[%d].xyz, vertex.position, program.env[%d].w, program.env[%d];\n"
913                 "MOV result.texcoord[%d].w, 1;\n",
914                 lights[k], 10+k, 10+k, lights[k]);
915             if(k < numlights) vsdl.put(tc, strlen(tc));
916             else psdl.put(tc, strlen(tc));
917 
918             if(s.type & SHADER_GLSLANG) formatstring(dl)(
919                 "%s.rgb += dynlight%dcolor.rgb * (1.0 - clamp(dot(dynlight%ddir, dynlight%ddir), 0.0, 1.0));\n",
920                 pslight, k, k, k);
921             else if(k>=numlights) formatstring(dl)(
922                 "DP3_SAT dynlight.x, dynlightdir, dynlightdir;\n"
923                 "SUB dynlight.x, 1, dynlight.x;\n"
924                 "MAD %s.rgb, program.env[%d], dynlight.x, %s;\n",
925                 pslight, 10+k, pslight);
926             else if(ati_dph_bug || lights[k]==emufogtc) formatstring(dl)(
927                 "%s"
928                 "DP3_SAT dynlight.x, fragment.texcoord[%d], fragment.texcoord[%d];\n"
929                 "SUB dynlight.x, 1, dynlight.x;\n"
930                 "MAD %s.rgb, program.env[%d], dynlight.x, %s;\n",
931                 !k ? "TEMP dynlight;\n" : "",
932                 lights[k], lights[k],
933                 pslight, 10+k, pslight);
934             else formatstring(dl)(
935                 "%s"
936                 "DPH_SAT dynlight.x, -fragment.texcoord[%d], fragment.texcoord[%d];\n"
937                 "MAD %s.rgb, program.env[%d], dynlight.x, %s;\n",
938                 !k ? "TEMP dynlight;\n" : "",
939                 lights[k], lights[k],
940                 pslight, 10+k, pslight);
941             psdl.put(dl, strlen(dl));
942         }
943 
944         EMUFOGVS(emufogcoord && i+1==numlights && emufogcoord >= vspragma, vsdl, vspragma, vspragma+strlen(vspragma)+1, emufogcoord, emufogtc, emufogcomp);
945 		psdl.put(pspragma, strlen(pspragma)+1);
946 
947         EMUFOGPS(emufogcoord && i+1==numlights, psdl, emufogtc, emufogcomp);
948 
949         defformatstring(name)("<dynlight %d>%s", i+1, sname);
950         Shader *variant = newshader(s.type, name, vsdl.getbuf(), psdl.getbuf(), &s, row);
951         if(!variant) return;
952         if(row < 4) genwatervariant(s, name, vsdl, psdl, row+2);
953 	}
954 }
955 
genshadowmapvariant(Shader & s,const char * sname,const char * vs,const char * ps,int row=1)956 static void genshadowmapvariant(Shader &s, const char *sname, const char *vs, const char *ps, int row = 1)
957 {
958     int smtc = -1, emufogtc = -1, emufogcomp = -1;
959     const char *emufogcoord = NULL;
960     if(!(s.type & SHADER_GLSLANG))
961     {
962         uint usedtc = findusedtexcoords(vs);
963         GLint maxtc = 0;
964         glGetIntegerv(GL_MAX_TEXTURE_COORDS_ARB, &maxtc);
965         if(maxtc-reserveshadowmaptc<0) return;
966         loopi(maxtc-reserveshadowmaptc) if(!(usedtc&(1<<i))) { smtc = i; break; }
967         extern int emulatefog;
968         if(smtc<0 && emulatefog && reserveshadowmaptc>0 && !(usedtc&(1<<(maxtc-reserveshadowmaptc))) && strstr(ps, "OPTION ARB_fog_linear;"))
969         {
970             emufogcoord = strstr(vs, "result.fogcoord");
971             if(!emufogcoord || !findunusedtexcoordcomponent(vs, emufogtc, emufogcomp)) return;
972             smtc = maxtc-reserveshadowmaptc;
973         }
974         if(smtc<0) return;
975     }
976 
977     const char *vspragma = strstr(vs, "#pragma CUBE2_shadowmap"), *pspragma = strstr(ps, "#pragma CUBE2_shadowmap");
978     string pslight;
979     vspragma += strcspn(vspragma, "\n");
980     if(*vspragma) vspragma++;
981 
982     if(sscanf(pspragma, "#pragma CUBE2_shadowmap %s", pslight)!=1) return;
983 
984     pspragma += strcspn(pspragma, "\n");
985     if(*pspragma) pspragma++;
986 
987     vector<char> vssm, pssm;
988 
989     if(s.type & SHADER_GLSLANG)
990     {
991         const char *tc = "varying vec3 shadowmaptc;\n";
992         vssm.put(tc, strlen(tc));
993         pssm.put(tc, strlen(tc));
994         const char *smtex =
995             "uniform sampler2D shadowmap;\n"
996             "uniform vec4 shadowmapambient;\n";
997         pssm.put(smtex, strlen(smtex));
998         if(!strstr(ps, "ambient"))
999         {
1000             const char *amb = "uniform vec4 ambient;\n";
1001             pssm.put(amb, strlen(amb));
1002         }
1003     }
1004 
1005     EMUFOGVS(emufogcoord && emufogcoord < vspragma, vssm, vs, vspragma, emufogcoord, emufogtc, emufogcomp);
1006     pssm.put(ps, pspragma-ps);
1007 
1008     extern int smoothshadowmappeel;
1009     if(s.type & SHADER_GLSLANG)
1010     {
1011         const char *tc =
1012             "shadowmaptc = vec3(gl_TextureMatrix[2] * gl_Vertex);\n";
1013         vssm.put(tc, strlen(tc));
1014         const char *sm =
1015             smoothshadowmappeel ?
1016                 "vec4 smvals = texture2D(shadowmap, shadowmaptc.xy);\n"
1017                 "vec2 smdiff = clamp(smvals.xz - shadowmaptc.zz*smvals.y, 0.0, 1.0);\n"
1018                 "float shadowed = clamp((smdiff.x > 0.0 ? smvals.w : 0.0) - 8.0*smdiff.y, 0.0, 1.0);\n" :
1019 
1020                 "vec4 smvals = texture2D(shadowmap, shadowmaptc.xy);\n"
1021                 "float smtest = shadowmaptc.z*smvals.y;\n"
1022                 "float shadowed = smtest < smvals.x && smtest > smvals.z ? smvals.w : 0.0;\n";
1023         pssm.put(sm, strlen(sm));
1024         defformatstring(smlight)(
1025             "%s.rgb -= shadowed*clamp(%s.rgb - shadowmapambient.rgb, 0.0, 1.0);\n",
1026             pslight, pslight, pslight);
1027         pssm.put(smlight, strlen(smlight));
1028     }
1029     else
1030     {
1031         defformatstring(tc)(
1032             "DP4 result.texcoord[%d].x, state.matrix.texture[2].row[0], vertex.position;\n"
1033             "DP4 result.texcoord[%d].y, state.matrix.texture[2].row[1], vertex.position;\n"
1034             "DP4 result.texcoord[%d].z, state.matrix.texture[2].row[2], vertex.position;\n",
1035             smtc, smtc, smtc);
1036         vssm.put(tc, strlen(tc));
1037 
1038         defformatstring(sm)(
1039             smoothshadowmappeel ?
1040                 "TEMP smvals, smdiff, smambient;\n"
1041                 "TEX smvals, fragment.texcoord[%d], texture[7], 2D;\n"
1042                 "MAD_SAT smdiff.xz, -fragment.texcoord[%d].z, smvals.y, smvals;\n"
1043                 "CMP smvals.w, -smdiff.x, smvals.w, 0;\n"
1044                 "MAD_SAT smvals.w, -8, smdiff.z, smvals.w;\n" :
1045 
1046                 "TEMP smvals, smtest, smambient;\n"
1047                 "TEX smvals, fragment.texcoord[%d], texture[7], 2D;\n"
1048                 "MUL smtest.z, fragment.texcoord[%d].z, smvals.y;\n"
1049                 "SLT smtest.xz, smtest.z, smvals;\n"
1050                 "MAD_SAT smvals.w, smvals.w, smtest.x, -smtest.z;\n",
1051             smtc, smtc);
1052         pssm.put(sm, strlen(sm));
1053         formatstring(sm)(
1054             "SUB_SAT smambient.rgb, %s, program.env[7];\n"
1055             "MAD %s.rgb, smvals.w, -smambient, %s;\n",
1056             pslight, pslight, pslight);
1057         pssm.put(sm, strlen(sm));
1058     }
1059 
1060     if(!hasFBO) for(char *s = pssm.getbuf();;)
1061     {
1062         s = strstr(s, "smvals.w");
1063         if(!s) break;
1064         s[7] = 'y';
1065         s += 8;
1066     }
1067 
1068     EMUFOGVS(emufogcoord && emufogcoord >= vspragma, vssm, vspragma, vspragma+strlen(vspragma)+1, emufogcoord, emufogtc, emufogcomp);
1069     pssm.put(pspragma, strlen(pspragma)+1);
1070 
1071     EMUFOGPS(emufogcoord, pssm, emufogtc, emufogcomp);
1072 
1073     defformatstring(name)("<shadowmap>%s", sname);
1074     Shader *variant = newshader(s.type, name, vssm.getbuf(), pssm.getbuf(), &s, row);
1075     if(!variant) return;
1076     genwatervariant(s, name, vssm.getbuf(), pssm.getbuf(), row+2);
1077 
1078     if(strstr(vs, "#pragma CUBE2_dynlight")) gendynlightvariant(s, name, vssm.getbuf(), pssm.getbuf(), row);
1079 }
1080 
1081 VAR(defershaders, 0, 1, 1);
1082 
defershader(int * type,const char * name,const char * contents)1083 void defershader(int *type, const char *name, const char *contents)
1084 {
1085     Shader *exists = shaders.access(name);
1086     if(exists && !(exists->type&SHADER_INVALID)) return;
1087     if(!defershaders) { execute(contents); return; }
1088     char *rname = exists ? exists->name : newstring(name);
1089     Shader &s = shaders[rname];
1090     s.name = rname;
1091     DELETEA(s.defer);
1092     s.defer = newstring(contents);
1093     s.type = SHADER_DEFERRED | *type;
1094     s.standard = standardshader;
1095 }
1096 
useshader(Shader * s)1097 void useshader(Shader *s)
1098 {
1099     if(!(s->type&SHADER_DEFERRED) || !s->defer) return;
1100 
1101     char *defer = s->defer;
1102     s->defer = NULL;
1103     bool wasstandard = standardshader, wasforcing = forceshaders, waspersisting = persistidents;
1104     standardshader = s->standard;
1105     forceshaders = false;
1106     persistidents = false;
1107     curparams.setsize(0);
1108     execute(defer);
1109     persistidents = waspersisting;
1110     forceshaders = wasforcing;
1111     standardshader = wasstandard;
1112     delete[] defer;
1113 
1114     if(s->type&SHADER_DEFERRED)
1115     {
1116         DELETEA(s->defer);
1117         s->type = SHADER_INVALID;
1118     }
1119 }
1120 
fixshaderdetail()1121 void fixshaderdetail()
1122 {
1123     // must null out separately because fixdetailshader can recursively set it
1124     enumerate(shaders, Shader, s, { if(!s.forced) s.detailshader = NULL; });
1125     enumerate(shaders, Shader, s, { if(s.forced) s.fixdetailshader(); });
1126     linkslotshaders();
1127 }
1128 
1129 VARF(nativeshaders, 0, 1, 1, fixshaderdetail());
1130 VARFP(shaderdetail, 0, MAXSHADERDETAIL, MAXSHADERDETAIL, fixshaderdetail());
1131 
fixdetailshader(bool force,bool recurse)1132 void Shader::fixdetailshader(bool force, bool recurse)
1133 {
1134     Shader *alt = this;
1135     detailshader = NULL;
1136     do
1137     {
1138         Shader *cur = shaderdetail < MAXSHADERDETAIL ? alt->fastshader[shaderdetail] : alt;
1139         if(cur->type&SHADER_DEFERRED && force) useshader(cur);
1140         if(!(cur->type&SHADER_INVALID))
1141         {
1142             if(cur->type&SHADER_DEFERRED) break;
1143             detailshader = cur;
1144             if(cur->native || !nativeshaders) break;
1145         }
1146         alt = alt->altshader;
1147     } while(alt && alt!=this);
1148 
1149     if(recurse && detailshader) loopi(MAXVARIANTROWS) loopvj(detailshader->variants[i]) detailshader->variants[i][j]->fixdetailshader(force, false);
1150 }
1151 
useshaderbyname(const char * name)1152 Shader *useshaderbyname(const char *name)
1153 {
1154     Shader *s = shaders.access(name);
1155     if(!s) return NULL;
1156     if(!s->detailshader) s->fixdetailshader();
1157     s->forced = true;
1158     return s;
1159 }
1160 
shader(int * type,char * name,char * vs,char * ps)1161 void shader(int *type, char *name, char *vs, char *ps)
1162 {
1163     if(lookupshaderbyname(name)) return;
1164 
1165     if((renderpath!=R_GLSLANG && *type & SHADER_GLSLANG) ||
1166        (!hasCM && strstr(ps, *type & SHADER_GLSLANG ? "textureCube" : "CUBE;")) ||
1167        (!hasTR && strstr(ps, *type & SHADER_GLSLANG ? "texture2DRect" : "RECT;")))
1168     {
1169         loopv(curparams)
1170         {
1171             if(curparams[i].name) delete[] curparams[i].name;
1172         }
1173         curparams.setsize(0);
1174         return;
1175     }
1176 
1177     extern int mesa_program_bug;
1178     if(renderpath!=R_FIXEDFUNCTION)
1179     {
1180         defformatstring(info)("shader %s", name);
1181         progress(loadprogress, info);
1182         if(mesa_program_bug && initshaders)
1183         {
1184             glEnable(GL_VERTEX_PROGRAM_ARB);
1185             glEnable(GL_FRAGMENT_PROGRAM_ARB);
1186         }
1187     }
1188     Shader *s = newshader(*type, name, vs, ps);
1189     if(s && renderpath!=R_FIXEDFUNCTION)
1190     {
1191         // '#' is a comment in vertex/fragment programs, while '#pragma' allows an escape for GLSL, so can handle both at once
1192         if(strstr(vs, "#pragma CUBE2_water")) genwatervariant(*s, s->name, vs, ps);
1193         if(strstr(vs, "#pragma CUBE2_shadowmap")) genshadowmapvariant(*s, s->name, vs, ps);
1194         if(strstr(vs, "#pragma CUBE2_dynlight")) gendynlightvariant(*s, s->name, vs, ps);
1195     }
1196     if(renderpath!=R_FIXEDFUNCTION && mesa_program_bug && initshaders)
1197     {
1198         glDisable(GL_VERTEX_PROGRAM_ARB);
1199         glDisable(GL_FRAGMENT_PROGRAM_ARB);
1200     }
1201     curparams.setsize(0);
1202 }
1203 
variantshader(int * type,char * name,int * row,char * vs,char * ps)1204 void variantshader(int *type, char *name, int *row, char *vs, char *ps)
1205 {
1206     if(*row < 0)
1207     {
1208         shader(type, name, vs, ps);
1209         return;
1210     }
1211 
1212     if(renderpath==R_FIXEDFUNCTION && standardshader) return;
1213 
1214     Shader *s = lookupshaderbyname(name);
1215     if(!s) return;
1216 
1217     defformatstring(varname)("<variant:%d,%d>%s", s->variants[*row].length(), *row, name);
1218     //defformatstring(info)("shader %s", varname);
1219     //progress(loadprogress, info);
1220     extern int mesa_program_bug;
1221     if(renderpath!=R_FIXEDFUNCTION && mesa_program_bug && initshaders)
1222     {
1223         glEnable(GL_VERTEX_PROGRAM_ARB);
1224         glEnable(GL_FRAGMENT_PROGRAM_ARB);
1225     }
1226     Shader *v = newshader(*type, varname, vs, ps, s, *row);
1227     if(v && renderpath!=R_FIXEDFUNCTION)
1228     {
1229         // '#' is a comment in vertex/fragment programs, while '#pragma' allows an escape for GLSL, so can handle both at once
1230         if(strstr(vs, "#pragma CUBE2_dynlight")) gendynlightvariant(*s, varname, vs, ps, *row);
1231         if(strstr(vs, "#pragma CUBE2_variant")) gengenericvariant(*s, varname, vs, ps, *row);
1232     }
1233     if(renderpath!=R_FIXEDFUNCTION && mesa_program_bug && initshaders)
1234     {
1235         glDisable(GL_VERTEX_PROGRAM_ARB);
1236         glDisable(GL_FRAGMENT_PROGRAM_ARB);
1237     }
1238 }
1239 
setshader(char * name)1240 void setshader(char *name)
1241 {
1242     curparams.setsize(0);
1243     Shader *s = shaders.access(name);
1244 	if(!s)
1245 	{
1246 		if(renderpath!=R_FIXEDFUNCTION) conoutf("\frno such shader: %s", name);
1247 	}
1248 	else curshader = s;
1249 }
1250 
findshaderparam(Slot & s,const char * name,int type,int index)1251 ShaderParam *findshaderparam(Slot &s, const char *name, int type, int index)
1252 {
1253 	loopv(s.params)
1254 	{
1255 		ShaderParam &param = s.params[i];
1256 		if((name && param.name && !strcmp(name, param.name)) || (param.type==type && param.index==index)) return &param;
1257 	}
1258     if(!s.shader->detailshader) return NULL;
1259     loopv(s.shader->detailshader->defaultparams)
1260     {
1261         ShaderParam &param = s.shader->detailshader->defaultparams[i];
1262         if((name && param.name && !strcmp(name, param.name)) || (param.type==type && param.index==index)) return &param;
1263     }
1264 	return NULL;
1265 }
1266 
resetslotshader()1267 void resetslotshader()
1268 {
1269     curshader = NULL;
1270     curparams.setsize(0);
1271 }
1272 
setslotshader(Slot & s)1273 void setslotshader(Slot &s)
1274 {
1275     s.shader = curshader;
1276     if(!s.shader)
1277     {
1278         s.shader = stdworldshader;
1279         return;
1280     }
1281     loopv(curparams) s.params.add(curparams[i]);
1282 }
1283 
linkslotshader(Slot & s,bool load)1284 void linkslotshader(Slot &s, bool load)
1285 {
1286     if(!s.shader) return;
1287 
1288     if(load && !s.shader->detailshader) s.shader->fixdetailshader();
1289 
1290     Shader *sh = s.shader->detailshader;
1291     if(!sh)
1292     {
1293         if(load)
1294         {
1295             loopv(s.params) s.params[i].loc = -1;
1296             return;
1297         }
1298         sh = s.shader;
1299     }
1300 
1301     loopv(s.params)
1302     {
1303         int loc = -1;
1304         ShaderParam &param = s.params[i];
1305         loopv(sh->defaultparams)
1306         {
1307             ShaderParam &dparam = sh->defaultparams[i];
1308             if(param.name ? dparam.name==param.name : dparam.type==param.type && dparam.index==param.index)
1309             {
1310                 if(memcmp(param.val, dparam.val, sizeof(param.val))) loc = i;
1311                 break;
1312             }
1313         }
1314         param.loc = loc;
1315     }
1316 
1317     if(strstr(sh->name, "glowworld"))
1318     {
1319         ShaderParam *cparam = findshaderparam(s, "glowcolor", SHPARAM_PIXEL, 0);
1320         if(!cparam) cparam = findshaderparam(s, "glowcolor", SHPARAM_VERTEX, 0);
1321         if(cparam) loopk(3) s.glowcolor[k] = cparam->val[k];
1322         else s.glowcolor = vec(1, 1, 1);
1323         if(strstr(sh->name, "pulse"))
1324         {
1325             ShaderParam *pulseparam, *speedparam;
1326             if(strstr(sh->name, "bump"))
1327             {
1328                 pulseparam = findshaderparam(s, "pulseglowcolor", SHPARAM_PIXEL, 5);
1329                 speedparam = findshaderparam(s, "pulseglowspeed", SHPARAM_VERTEX, 4);
1330             }
1331             else
1332             {
1333                 pulseparam = findshaderparam(s, "pulseglowcolor", SHPARAM_VERTEX, 2);
1334                 speedparam = findshaderparam(s, "pulseglowspeed", SHPARAM_VERTEX, 1);
1335             }
1336             if(pulseparam) loopk(3) s.pulseglowcolor[k] = pulseparam->val[k];
1337             else s.pulseglowcolor = vec(0, 0, 0);
1338             if(speedparam) s.pulseglowspeed = speedparam->val[0]/1000.0f;
1339             else s.pulseglowspeed = 1;
1340         }
1341     }
1342     else if(!strcmp(sh->name, "colorworld"))
1343     {
1344         ShaderParam *cparam = findshaderparam(s, "colorscale", SHPARAM_PIXEL, 0);
1345         if(cparam && (cparam->val[0]!=1 || cparam->val[1]!=1 || cparam->val[2]!=1) && s.sts.length()>=1 && !strstr(s.sts[0].name, "<ffcolor:"))
1346         {
1347             defformatstring(colorname)("<ffcolor:%f/%f/%f>%s", cparam->val[0], cparam->val[1], cparam->val[2], s.sts[0].name);
1348             copystring(s.sts[0].name, colorname);
1349         }
1350     }
1351 }
1352 
altshader(char * origname,char * altname)1353 void altshader(char *origname, char *altname)
1354 {
1355     Shader *orig = shaders.access(origname), *alt = shaders.access(altname);
1356     if(!orig || !alt) return;
1357     orig->altshader = alt;
1358     orig->fixdetailshader(false);
1359 }
1360 
fastshader(char * nice,char * fast,int * detail)1361 void fastshader(char *nice, char *fast, int *detail)
1362 {
1363     Shader *ns = shaders.access(nice), *fs = shaders.access(fast);
1364     if(!ns || !fs) return;
1365     loopi(min(*detail+1, MAXSHADERDETAIL)) ns->fastshader[i] = fs;
1366     ns->fixdetailshader(false);
1367 }
1368 
1369 COMMAND(shader, "isss");
1370 COMMAND(variantshader, "isiss");
1371 COMMAND(setshader, "s");
1372 COMMAND(altshader, "ss");
1373 COMMAND(fastshader, "ssi");
1374 COMMAND(defershader, "iss");
1375 ICOMMAND(forceshader, "s", (const char *name), useshaderbyname(name));
1376 
isshaderdefined(char * name)1377 void isshaderdefined(char *name)
1378 {
1379     Shader *s = lookupshaderbyname(name);
1380     intret(s ? 1 : 0);
1381 }
1382 
isshadernative(char * name)1383 void isshadernative(char *name)
1384 {
1385     Shader *s = lookupshaderbyname(name);
1386     intret(s && s->native ? 1 : 0);
1387 }
1388 
1389 COMMAND(isshaderdefined, "s");
1390 COMMAND(isshadernative, "s");
1391 
1392 static hashtable<const char *, const char *> shaderparamnames(256);
1393 
getshaderparamname(const char * name)1394 const char *getshaderparamname(const char *name)
1395 {
1396     const char **exists = shaderparamnames.access(name);
1397     if(exists) return *exists;
1398     name = newstring(name);
1399     shaderparamnames[name] = name;
1400     return name;
1401 }
1402 
addshaderparam(const char * name,int type,int n,float x,float y,float z,float w)1403 void addshaderparam(const char *name, int type, int n, float x, float y, float z, float w)
1404 {
1405     if((type==SHPARAM_VERTEX || type==SHPARAM_PIXEL) && (n<0 || n>=MAXSHADERPARAMS))
1406     {
1407         conoutf("\frshader param index must be 0..%d\n", MAXSHADERPARAMS-1);
1408         return;
1409     }
1410     if(name) name = getshaderparamname(name);
1411     loopv(curparams)
1412     {
1413         ShaderParam &param = curparams[i];
1414         if(param.type == type && (name ? param.name==name : param.index == n))
1415         {
1416             param.val[0] = x;
1417             param.val[1] = y;
1418             param.val[2] = z;
1419             param.val[3] = w;
1420             return;
1421         }
1422     }
1423     ShaderParam param = {name, type, n, -1, {x, y, z, w}};
1424     curparams.add(param);
1425 }
1426 
1427 ICOMMAND(setvertexparam, "iffff", (int *n, float *x, float *y, float *z, float *w), addshaderparam(NULL, SHPARAM_VERTEX, *n, *x, *y, *z, *w));
1428 ICOMMAND(setpixelparam, "iffff", (int *n, float *x, float *y, float *z, float *w), addshaderparam(NULL, SHPARAM_PIXEL, *n, *x, *y, *z, *w));
1429 ICOMMAND(setuniformparam, "sffff", (char *name, float *x, float *y, float *z, float *w), addshaderparam(name, SHPARAM_UNIFORM, -1, *x, *y, *z, *w));
1430 ICOMMAND(setshaderparam, "sffff", (char *name, float *x, float *y, float *z, float *w), addshaderparam(name, SHPARAM_LOOKUP, -1, *x, *y, *z, *w));
1431 ICOMMAND(defvertexparam, "siffff", (char *name, int *n, float *x, float *y, float *z, float *w), addshaderparam(name[0] ? name : NULL, SHPARAM_VERTEX, *n, *x, *y, *z, *w));
1432 ICOMMAND(defpixelparam, "siffff", (char *name, int *n, float *x, float *y, float *z, float *w), addshaderparam(name[0] ? name : NULL, SHPARAM_PIXEL, *n, *x, *y, *z, *w));
1433 ICOMMAND(defuniformparam, "sffff", (char *name, float *x, float *y, float *z, float *w), addshaderparam(name, SHPARAM_UNIFORM, -1, *x, *y, *z, *w));
1434 
1435 #define NUMPOSTFXBINDS 10
1436 
1437 struct postfxtex
1438 {
1439     GLuint id;
1440     int scale, used;
1441 
postfxtexpostfxtex1442     postfxtex() : id(0), scale(0), used(-1) {}
1443 };
1444 vector<postfxtex> postfxtexs;
1445 int postfxbinds[NUMPOSTFXBINDS];
1446 GLuint postfxfb = 0;
1447 int postfxw = 0, postfxh = 0;
1448 
1449 struct postfxpass
1450 {
1451     Shader *shader;
1452     vec4 params;
1453     uint inputs, freeinputs;
1454     int outputbind, outputscale;
1455 
postfxpasspostfxpass1456     postfxpass() : shader(NULL), inputs(1), freeinputs(1), outputbind(0), outputscale(0) {}
1457 };
1458 vector<postfxpass> postfxpasses;
1459 
allocatepostfxtex(int scale)1460 static int allocatepostfxtex(int scale)
1461 {
1462     loopv(postfxtexs)
1463     {
1464         postfxtex &t = postfxtexs[i];
1465         if(t.scale==scale && t.used < 0) return i;
1466     }
1467     postfxtex &t = postfxtexs.add();
1468     t.scale = scale;
1469     glGenTextures(1, &t.id);
1470     createtexture(t.id, max(screen->w>>scale, 1), max(screen->h>>scale, 1), NULL, 3, 1, GL_RGB, GL_TEXTURE_RECTANGLE_ARB);
1471     return postfxtexs.length()-1;
1472 }
1473 
cleanuppostfx(bool fullclean)1474 void cleanuppostfx(bool fullclean)
1475 {
1476     if(fullclean && postfxfb)
1477     {
1478         glDeleteFramebuffers_(1, &postfxfb);
1479         postfxfb = 0;
1480     }
1481 
1482     loopv(postfxtexs) glDeleteTextures(1, &postfxtexs[i].id);
1483     postfxtexs.setsize(0);
1484 
1485     postfxw = 0;
1486     postfxh = 0;
1487 }
1488 
renderpostfx()1489 void renderpostfx()
1490 {
1491 	extern int viewtype;
1492     if(postfxpasses.empty() || viewtype || renderpath==R_FIXEDFUNCTION) return;
1493 
1494     if(postfxw != screen->w || postfxh != screen->h)
1495     {
1496         cleanuppostfx(false);
1497         postfxw = screen->w;
1498         postfxh = screen->h;
1499     }
1500 
1501     int binds[NUMPOSTFXBINDS];
1502     loopi(NUMPOSTFXBINDS) binds[i] = -1;
1503     loopv(postfxtexs) postfxtexs[i].used = -1;
1504 
1505     binds[0] = allocatepostfxtex(0);
1506     postfxtexs[binds[0]].used = 0;
1507     glBindTexture(GL_TEXTURE_RECTANGLE_ARB, postfxtexs[binds[0]].id);
1508     glCopyTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, 0, 0, screen->w, screen->h);
1509 
1510     if(hasFBO && postfxpasses.length() > 1)
1511     {
1512         if(!postfxfb) glGenFramebuffers_(1, &postfxfb);
1513         glBindFramebuffer_(GL_FRAMEBUFFER_EXT, postfxfb);
1514     }
1515 
1516     setenvparamf("millis", SHPARAM_VERTEX, 1, lastmillis/1000.0f, lastmillis/1000.0f, lastmillis/1000.0f);
1517 
1518     loopv(postfxpasses)
1519     {
1520         postfxpass &p = postfxpasses[i];
1521 
1522         int tex = -1;
1523         if(!postfxpasses.inrange(i+1))
1524         {
1525             if(hasFBO && postfxpasses.length() > 1) glBindFramebuffer_(GL_FRAMEBUFFER_EXT, 0);
1526         }
1527         else
1528         {
1529             tex = allocatepostfxtex(p.outputscale);
1530             if(hasFBO) glFramebufferTexture2D_(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, postfxtexs[tex].id, 0);
1531         }
1532 
1533         int w = tex >= 0 ? max(screen->w>>postfxtexs[tex].scale, 1) : screen->w,
1534             h = tex >= 0 ? max(screen->h>>postfxtexs[tex].scale, 1) : screen->h;
1535         glViewport(0, 0, w, h);
1536         p.shader->set();
1537         setlocalparamfv("params", SHPARAM_VERTEX, 0, p.params.v);
1538         setlocalparamfv("params", SHPARAM_PIXEL, 0, p.params.v);
1539         int tw = w, th = h, tmu = 0;
1540         loopj(NUMPOSTFXBINDS) if(p.inputs&(1<<j) && binds[j] >= 0)
1541         {
1542             if(!tmu)
1543             {
1544                 tw = max(screen->w>>postfxtexs[binds[j]].scale, 1);
1545                 th = max(screen->h>>postfxtexs[binds[j]].scale, 1);
1546             }
1547             else glActiveTexture_(GL_TEXTURE0_ARB + tmu);
1548             glBindTexture(GL_TEXTURE_RECTANGLE_ARB, postfxtexs[binds[j]].id);
1549             ++tmu;
1550         }
1551         if(tmu) glActiveTexture_(GL_TEXTURE0_ARB);
1552         glBegin(GL_QUADS);
1553         glTexCoord2f(0,  0);  glVertex2f(-1, -1);
1554         glTexCoord2f(tw, 0);  glVertex2f( 1, -1);
1555         glTexCoord2f(tw, th); glVertex2f( 1,  1);
1556         glTexCoord2f(0,  th); glVertex2f(-1,  1);
1557         glEnd();
1558 
1559         loopj(NUMPOSTFXBINDS) if(p.freeinputs&(1<<j) && binds[j] >= 0)
1560         {
1561             postfxtexs[binds[j]].used = -1;
1562             binds[j] = -1;
1563         }
1564         if(tex >= 0)
1565         {
1566             if(binds[p.outputbind] >= 0) postfxtexs[binds[p.outputbind]].used = -1;
1567             binds[p.outputbind] = tex;
1568             postfxtexs[tex].used = p.outputbind;
1569             if(!hasFBO)
1570             {
1571                 glBindTexture(GL_TEXTURE_RECTANGLE_ARB, postfxtexs[tex].id);
1572                 glCopyTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, 0, 0, w, h);
1573             }
1574         }
1575     }
1576 }
1577 
addpostfx(const char * name,int outputbind,int outputscale,uint inputs,uint freeinputs,const vec4 & params)1578 static bool addpostfx(const char *name, int outputbind, int outputscale, uint inputs, uint freeinputs, const vec4 &params)
1579 {
1580     if(!hasTR || !*name) return false;
1581     Shader *s = useshaderbyname(name);
1582     if(!s)
1583     {
1584         conoutf("no such postfx shader: %s", name);
1585         return false;
1586     }
1587     postfxpass &p = postfxpasses.add();
1588     p.shader = s;
1589     p.outputbind = outputbind;
1590     p.outputscale = outputscale;
1591     p.inputs = inputs;
1592     p.freeinputs = freeinputs;
1593     p.params = params;
1594     return true;
1595 }
1596 
clearpostfx()1597 void clearpostfx()
1598 {
1599     postfxpasses.setsize(0);
1600     cleanuppostfx(false);
1601 }
1602 
1603 COMMAND(clearpostfx, "");
1604 
1605 ICOMMAND(addpostfx, "siisffff", (char *name, int *bind, int *scale, char *inputs, float *x, float *y, float *z, float *w),
1606 {
1607     int inputmask = inputs[0] ? 0 : 1;
1608     int freemask = inputs[0] ? 0 : 1;
1609     bool freeinputs = true;
1610     for(; *inputs; inputs++) if(isdigit(*inputs))
1611     {
1612         inputmask |= 1<<(*inputs-'0');
1613         if(freeinputs) freemask |= 1<<(*inputs-'0');
1614     }
1615     else if(*inputs=='+') freeinputs = false;
1616     else if(*inputs=='-') freeinputs = true;
1617     inputmask &= (1<<NUMPOSTFXBINDS)-1;
1618     freemask &= (1<<NUMPOSTFXBINDS)-1;
1619     addpostfx(name, clamp(*bind, 0, NUMPOSTFXBINDS-1), max(*scale, 0), inputmask, freemask, vec4(*x, *y, *z, *w));
1620 });
1621 
1622 ICOMMAND(setpostfx, "sffff", (char *name, float *x, float *y, float *z, float *w),
1623 {
1624     clearpostfx();
1625     addpostfx(name, 0, 0, 1, 1, vec4(*x, *y, *z, *w));
1626 });
1627 
1628 struct tmufunc
1629 {
1630     GLenum combine, sources[4], ops[4];
1631     int scale;
1632 };
1633 
1634 struct tmu
1635 {
1636     GLenum mode;
1637     GLfloat color[4];
1638     tmufunc rgb, alpha;
1639 };
1640 
1641 #define INVALIDTMU \
1642 { \
1643     0, \
1644     { -1, -1, -1, -1 }, \
1645     { 0, { 0, 0, 0, ~0 }, { 0, 0, 0, 0 }, 0 }, \
1646     { 0, { 0, 0, 0, ~0 }, { 0, 0, 0, 0 }, 0 } \
1647 }
1648 
1649 #define INITTMU \
1650 { \
1651     GL_MODULATE, \
1652     { 0, 0, 0, 0 }, \
1653     { GL_MODULATE, { GL_TEXTURE, GL_PREVIOUS_ARB, GL_CONSTANT_ARB, GL_ZERO }, { GL_SRC_COLOR, GL_SRC_COLOR, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_COLOR }, 1 }, \
1654     { GL_MODULATE, { GL_TEXTURE, GL_PREVIOUS_ARB, GL_CONSTANT_ARB, GL_ZERO }, { GL_SRC_ALPHA, GL_SRC_ALPHA, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA }, 1 } \
1655 }
1656 
1657 #define MAXTMUS 8
1658 
1659 tmu tmus[MAXTMUS] =
1660 {
1661 	INVALIDTMU,
1662 	INVALIDTMU,
1663 	INVALIDTMU,
1664     INVALIDTMU,
1665     INVALIDTMU,
1666     INVALIDTMU,
1667     INVALIDTMU,
1668 	INVALIDTMU
1669 };
1670 
1671 VAR(maxtmus, 1, 0, 0);
1672 
parsetmufunc(tmu & t,tmufunc & f,const char * s)1673 void parsetmufunc(tmu &t, tmufunc &f, const char *s)
1674 {
1675     int arg = -1;
1676     while(*s) switch(*s++)
1677     {
1678         case 'T':
1679         case 't': f.sources[++arg] = GL_TEXTURE; f.ops[arg] = GL_SRC_COLOR; break;
1680         case 'P':
1681         case 'p': f.sources[++arg] = GL_PREVIOUS_ARB; f.ops[arg] = GL_SRC_COLOR; break;
1682         case 'K':
1683         case 'k': f.sources[++arg] = GL_CONSTANT_ARB; f.ops[arg] = GL_SRC_COLOR; break;
1684         case 'C':
1685         case 'c': f.sources[++arg] = GL_PRIMARY_COLOR_ARB; f.ops[arg] = GL_SRC_COLOR; break;
1686         case '~': f.ops[arg] = GL_ONE_MINUS_SRC_COLOR; break;
1687         case 'A':
1688         case 'a': f.ops[arg] = f.ops[arg]==GL_ONE_MINUS_SRC_COLOR ? GL_ONE_MINUS_SRC_ALPHA : GL_SRC_ALPHA; break;
1689         case '=': f.combine = GL_REPLACE; break;
1690         case '*': f.combine = GL_MODULATE; break;
1691         case '+': f.combine = GL_ADD; break;
1692         case '-': f.combine = GL_SUBTRACT_ARB; break;
1693         case ',':
1694         case '@': f.combine = GL_INTERPOLATE_ARB; break;
1695         case 'X':
1696         case 'x': while(!isdigit(*s)) s++; f.scale = *s++-'0'; break;
1697         // EXT_texture_env_dot3
1698         case '.': f.combine = GL_DOT3_RGB_ARB; break;
1699         // ATI_texture_env_combine3
1700         case '3': f.combine = GL_MODULATE_ADD_ATI; break;
1701         // NV_texture_env_combine4
1702         case '4': t.mode = GL_COMBINE4_NV; f.combine = GL_ADD; break;
1703         case '0': f.sources[++arg] = GL_ZERO; f.ops[arg] = GL_SRC_COLOR; break;
1704         case '1': f.sources[++arg] = GL_ZERO; f.ops[arg] = GL_ONE_MINUS_SRC_COLOR; break;
1705     }
1706 }
1707 
committmufunc(GLenum mode,bool rgb,tmufunc & dst,tmufunc & src)1708 void committmufunc(GLenum mode, bool rgb, tmufunc &dst, tmufunc &src)
1709 {
1710     if(dst.combine!=src.combine) glTexEnvi(GL_TEXTURE_ENV, rgb ? GL_COMBINE_RGB_ARB : GL_COMBINE_ALPHA_ARB, src.combine);
1711     loopi(3)
1712     {
1713         if(dst.sources[i]!=src.sources[i]) glTexEnvi(GL_TEXTURE_ENV, (rgb ? GL_SOURCE0_RGB_ARB : GL_SOURCE0_ALPHA_ARB)+i, src.sources[i]);
1714         if(dst.ops[i]!=src.ops[i]) glTexEnvi(GL_TEXTURE_ENV, (rgb ? GL_OPERAND0_RGB_ARB : GL_OPERAND0_ALPHA_ARB)+i, src.ops[i]);
1715     }
1716     if(mode==GL_COMBINE4_NV)
1717     {
1718         if(dst.sources[3]!=src.sources[3]) glTexEnvi(GL_TEXTURE_ENV, rgb ? GL_SOURCE3_RGB_NV : GL_SOURCE3_ALPHA_NV, src.sources[3]);
1719         if(dst.ops[3]!=src.ops[3]) glTexEnvi(GL_TEXTURE_ENV, rgb ? GL_OPERAND3_RGB_NV : GL_OPERAND3_ALPHA_NV, src.ops[3]);
1720     }
1721     if(dst.scale!=src.scale) glTexEnvi(GL_TEXTURE_ENV, rgb ? GL_RGB_SCALE_ARB : GL_ALPHA_SCALE, src.scale);
1722 }
1723 
committmu(int n,tmu & f)1724 void committmu(int n, tmu &f)
1725 {
1726     if(renderpath!=R_FIXEDFUNCTION || n>=maxtmus) return;
1727     if(tmus[n].mode!=f.mode) glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, f.mode);
1728     if(memcmp(tmus[n].color, f.color, sizeof(f.color))) glTexEnvfv(GL_TEXTURE_ENV, GL_TEXTURE_ENV_COLOR, f.color);
1729     committmufunc(f.mode, true, tmus[n].rgb, f.rgb);
1730     committmufunc(f.mode, false, tmus[n].alpha, f.alpha);
1731     tmus[n] = f;
1732 }
1733 
resettmu(int n)1734 void resettmu(int n)
1735 {
1736 	tmu f = tmus[n];
1737 	f.mode = GL_MODULATE;
1738 	f.rgb.scale = 1;
1739 	f.alpha.scale = 1;
1740 	committmu(n, f);
1741 }
1742 
scaletmu(int n,int rgbscale,int alphascale)1743 void scaletmu(int n, int rgbscale, int alphascale)
1744 {
1745 	tmu f = tmus[n];
1746 	if(rgbscale) f.rgb.scale = rgbscale;
1747 	if(alphascale) f.alpha.scale = alphascale;
1748 	committmu(n, f);
1749 }
1750 
colortmu(int n,float r,float g,float b,float a)1751 void colortmu(int n, float r, float g, float b, float a)
1752 {
1753 	tmu f = tmus[n];
1754 	f.color[0] = r;
1755 	f.color[1] = g;
1756 	f.color[2] = b;
1757 	f.color[3] = a;
1758 	committmu(n, f);
1759 }
1760 
setuptmu(int n,const char * rgbfunc,const char * alphafunc)1761 void setuptmu(int n, const char *rgbfunc, const char *alphafunc)
1762 {
1763 	static tmu init = INITTMU;
1764 	tmu f = tmus[n];
1765 
1766 	f.mode = GL_COMBINE_ARB;
1767 	if(rgbfunc) parsetmufunc(f, f.rgb, rgbfunc);
1768 	else f.rgb = init.rgb;
1769 	if(alphafunc) parsetmufunc(f, f.alpha, alphafunc);
1770 	else f.alpha = init.alpha;
1771 
1772 	committmu(n, f);
1773 }
1774 
1775 VAR(nolights, 1, 0, 0);
1776 VAR(nowater, 1, 0, 0);
1777 VAR(nomasks, 1, 0, 0);
1778 
inittmus()1779 void inittmus()
1780 {
1781 	if(hasTE && hasMT)
1782 	{
1783         GLint val;
1784 		glGetIntegerv(GL_MAX_TEXTURE_UNITS_ARB, &val);
1785         maxtmus = max(1, min(MAXTMUS, int(val)));
1786 		loopi(maxtmus)
1787 		{
1788 			glActiveTexture_(GL_TEXTURE0_ARB+i);
1789 			resettmu(i);
1790 		}
1791 		glActiveTexture_(GL_TEXTURE0_ARB);
1792 	}
1793     else if(hasTE) { maxtmus = 1; resettmu(0); }
1794     if(renderpath==R_FIXEDFUNCTION)
1795     {
1796         if(maxtmus<4) caustics = 0;
1797         if(maxtmus<2)
1798 		{
1799 			nolights = nowater = nomasks = 1;
1800 			extern int lightmodels;
1801 			lightmodels = 0;
1802             refractfog = 0;
1803         }
1804 	}
1805 }
1806 
cleanupshaders()1807 void cleanupshaders()
1808 {
1809     cleanuppostfx(true);
1810 
1811     defaultshader = notextureshader = nocolorshader = foggedshader = foggednotextureshader = NULL;
1812     enumerate(shaders, Shader, s, s.cleanup());
1813     Shader::lastshader = NULL;
1814     if(renderpath!=R_FIXEDFUNCTION)
1815     {
1816         glBindProgram_(GL_VERTEX_PROGRAM_ARB, 0);
1817         glBindProgram_(GL_FRAGMENT_PROGRAM_ARB, 0);
1818         glDisable(GL_VERTEX_PROGRAM_ARB);
1819         glDisable(GL_FRAGMENT_PROGRAM_ARB);
1820         if(renderpath==R_GLSLANG) glUseProgramObject_(0);
1821     }
1822     loopi(RESERVEDSHADERPARAMS + MAXSHADERPARAMS)
1823     {
1824         vertexparamstate[i].dirty = ShaderParamState::INVALID;
1825         pixelparamstate[i].dirty = ShaderParamState::INVALID;
1826     }
1827 
1828     tmu invalidtmu = INVALIDTMU;
1829     loopi(MAXTMUS) tmus[i] = invalidtmu;
1830 }
1831 
reloadshaders()1832 void reloadshaders()
1833 {
1834     persistidents = false;
1835     loadshaders();
1836     persistidents = true;
1837     if(renderpath==R_FIXEDFUNCTION) return;
1838     linkslotshaders();
1839     enumerate(shaders, Shader, s,
1840     {
1841         if(!s.standard && !(s.type&(SHADER_DEFERRED|SHADER_INVALID)) && !s.variantshader)
1842         {
1843             defformatstring(info)("shader %s", s.name);
1844             progress(0.0, info);
1845             if(!s.compile()) s.cleanup(true);
1846             loopi(MAXVARIANTROWS) loopvj(s.variants[i])
1847             {
1848                 Shader *v = s.variants[i][j];
1849                 if((v->reusevs && v->reusevs->type&SHADER_INVALID) ||
1850                    (v->reuseps && v->reuseps->type&SHADER_INVALID) ||
1851                    !v->compile())
1852                     v->cleanup(true);
1853             }
1854         }
1855         if(s.forced && !s.detailshader) s.fixdetailshader();
1856     });
1857 }
1858 
setupblurkernel(int radius,float sigma,float * weights,float * offsets)1859 void setupblurkernel(int radius, float sigma, float *weights, float *offsets)
1860 {
1861     if(radius<1 || radius>MAXBLURRADIUS) return;
1862     sigma *= 2*radius;
1863     float total = 1.0f/sigma;
1864     weights[0] = total;
1865     offsets[0] = 0;
1866     // rely on bilinear filtering to sample 2 pixels at once
1867     // transforms a*X + b*Y into (u+v)*[X*u/(u+v) + Y*(1 - u/(u+v))]
1868     loopi(radius)
1869     {
1870         float weight1 = exp(-((2*i)*(2*i)) / (2*sigma*sigma)) / sigma,
1871               weight2 = exp(-((2*i+1)*(2*i+1)) / (2*sigma*sigma)) / sigma,
1872               scale = weight1 + weight2,
1873               offset = 2*i+1 + weight2 / scale;
1874         weights[i+1] = scale;
1875         offsets[i+1] = offset;
1876         total += 2*scale;
1877     }
1878     loopi(radius+1) weights[i] /= total;
1879     for(int i = radius+1; i <= MAXBLURRADIUS; i++) weights[i] = offsets[i] = 0;
1880 }
1881 
setblurshader(int pass,int size,int radius,float * weights,float * offsets,GLenum target)1882 void setblurshader(int pass, int size, int radius, float *weights, float *offsets, GLenum target)
1883 {
1884     if(radius<1 || radius>MAXBLURRADIUS) return;
1885     static Shader *blurshader[7][2] = { { NULL, NULL }, { NULL, NULL }, { NULL, NULL }, { NULL, NULL }, { NULL, NULL }, { NULL, NULL }, { NULL, NULL } },
1886                   *blurrectshader[7][2] = { { NULL, NULL }, { NULL, NULL }, { NULL, NULL }, { NULL, NULL }, { NULL, NULL }, { NULL, NULL }, { NULL, NULL } };
1887     Shader *&s = (target == GL_TEXTURE_RECTANGLE_ARB ? blurrectshader : blurshader)[radius-1][pass];
1888     if(!s)
1889     {
1890         defformatstring(name)("blur%c%d%s", 'x'+pass, radius, target == GL_TEXTURE_RECTANGLE_ARB ? "rect" : "");
1891         s = lookupshaderbyname(name);
1892     }
1893     s->set();
1894     setlocalparamfv("weights", SHPARAM_PIXEL, 0, weights);
1895     setlocalparamfv("weights2", SHPARAM_PIXEL, 2, &weights[4]);
1896     setlocalparamf("offsets", SHPARAM_VERTEX, 1,
1897         pass==0 ? offsets[1]/size : offsets[0]/size,
1898         pass==1 ? offsets[1]/size : offsets[0]/size,
1899         (offsets[2] - offsets[1])/size,
1900         (offsets[3] - offsets[2])/size);
1901     loopk(4)
1902     {
1903         static const char *names[4] = { "offset4", "offset5", "offset6", "offset7" };
1904         setlocalparamf(names[k], SHPARAM_PIXEL, 3+k,
1905             pass==0 ? offsets[4+k]/size : offsets[0]/size,
1906             pass==1 ? offsets[4+k]/size : offsets[0]/size,
1907             0, 0);
1908     }
1909 }
1910 
1911