1 ////////////////////////////////////////////////////////////////////////////
2 //	File:		ProgramGLSL.cpp
3 //	Author:		Changchang Wu
4 //	Description : GLSL related classes
5 //		class ProgramGLSL		A simple wrapper of GLSL programs
6 //		class ShaderBagGLSL		GLSL shaders for SIFT
7 //		class FilterGLSL		GLSL gaussian filters for SIFT
8 //
9 //	Copyright (c) 2007 University of North Carolina at Chapel Hill
10 //	All Rights Reserved
11 //
12 //	Permission to use, copy, modify and distribute this software and its
13 //	documentation for educational, research and non-profit purposes, without
14 //	fee, and without a written agreement is hereby granted, provided that the
15 //	above copyright notice and the following paragraph appear in all copies.
16 //
17 //	The University of North Carolina at Chapel Hill make no representations
18 //	about the suitability of this software for any purpose. It is provided
19 //	'as is' without express or implied warranty.
20 //
21 //	Please send BUG REPORTS to ccwu@cs.unc.edu
22 //
23 ////////////////////////////////////////////////////////////////////////////
24 
25 
26 #include "GL/glew.h"
27 #include <string.h>
28 #include <stdio.h>
29 #include <iomanip>
30 #include <iostream>
31 #include <sstream>
32 #include <vector>
33 #include <algorithm>
34 #include <math.h>
35 using namespace std;
36 
37 #include "GlobalUtil.h"
38 #include "ProgramGLSL.h"
39 #include "GLTexImage.h"
40 #include "ShaderMan.h"
41 #include "SiftGPU.h"
42 
ShaderObject(int shadertype,const char * source,int filesource)43 ProgramGLSL::ShaderObject::ShaderObject(int shadertype, const char * source, int filesource)
44 {
45 
46 
47 	_type = shadertype;
48 	_compiled = 0;
49 
50 
51 	_shaderID = glCreateShader(shadertype);
52 	if(_shaderID == 0) return;
53 
54 	if(source)
55 	{
56 
57 		GLint				code_length;
58 		if(filesource ==0)
59 		{
60 			const char* code  = source;
61 			code_length = (GLint) strlen(code);
62 			glShaderSource(_shaderID, 1, (const char **) &code, &code_length);
63 		}else
64 		{
65 			char * code;
66 			if((code_length= ReadShaderFile(source, code)) ==0) return;
67 			glShaderSource(_shaderID, 1, (const char **) &code, &code_length);
68 			delete code;
69 		}
70 
71 		glCompileShader(_shaderID);
72 
73 		CheckCompileLog();
74 
75 		if(!_compiled) 		std::cout << source;
76 	}
77 
78 
79 
80 
81 }
82 
ReadShaderFile(const char * sourcefile,char * & code)83 int ProgramGLSL::ShaderObject::ReadShaderFile(const char *sourcefile,  char*& code )
84 {
85 	code = NULL;
86 	FILE * file;
87 	int    len=0;
88 
89 	if(sourcefile == NULL) return 0;
90 
91 	file = fopen(sourcefile,"rt");
92 	if(file == NULL) return 0;
93 
94 
95 	fseek(file, 0, SEEK_END);
96 	len = ftell(file);
97 	rewind(file);
98 	if(len >1)
99 	{
100 		code = new  char[len+1];
101 		fread(code, sizeof( char), len, file);
102 		code[len] = 0;
103 	}else
104 	{
105 		len = 0;
106 	}
107 
108 	fclose(file);
109 
110 	return len;
111 
112 }
113 
CheckCompileLog()114 void ProgramGLSL::ShaderObject::CheckCompileLog()
115 {
116 
117 	GLint status;
118 	glGetShaderiv(_shaderID, GL_COMPILE_STATUS, &status);
119 	_compiled = (status ==GL_TRUE);
120 
121 	if(_compiled == 0)	PrintCompileLog(std::cout);
122 
123 
124 }
125 
~ShaderObject()126 ProgramGLSL::ShaderObject::~ShaderObject()
127 {
128 	if(_shaderID)	glDeleteShader(_shaderID);
129 
130 }
131 
IsValidFragmentShader()132 int ProgramGLSL::ShaderObject::IsValidFragmentShader()
133 {
134 	return _type == GL_FRAGMENT_SHADER && _shaderID && _compiled;
135 }
136 
IsValidVertexShader()137 int  ProgramGLSL::ShaderObject::IsValidVertexShader()
138 {
139 	return _type == GL_VERTEX_SHADER && _shaderID && _compiled;
140 }
141 
142 
PrintCompileLog(ostream & os)143 void ProgramGLSL::ShaderObject::PrintCompileLog(ostream&os)
144 {
145 	GLint len = 0;
146 
147 	glGetShaderiv(_shaderID, GL_INFO_LOG_LENGTH , &len);
148 	if(len <=1) return;
149 
150 	char * compileLog = new char[len+1];
151 	if(compileLog == NULL) return;
152 
153 	glGetShaderInfoLog(_shaderID, len, &len, compileLog);
154 
155 
156 	os<<"Compile Log\n"<<compileLog<<"\n";
157 
158 	delete[] compileLog;
159 }
160 
161 
ProgramGLSL()162 ProgramGLSL::ProgramGLSL()
163 {
164 	_linked = 0;
165 	_TextureParam0 = -1;
166 	_programID = glCreateProgram();
167 }
~ProgramGLSL()168 ProgramGLSL::~ProgramGLSL()
169 {
170 	if(_programID)glDeleteProgram(_programID);
171 }
AttachShaderObject(ShaderObject & shader)172 void ProgramGLSL::AttachShaderObject(ShaderObject &shader)
173 {
174 	if(_programID  && shader.IsValidShaderObject())
175 		glAttachShader(_programID, shader.GetShaderID());
176 }
DetachShaderObject(ShaderObject & shader)177 void ProgramGLSL::DetachShaderObject(ShaderObject &shader)
178 {
179 	if(_programID  && shader.IsValidShaderObject())
180 		glDetachShader(_programID, shader.GetShaderID());
181 }
LinkProgram()182 int ProgramGLSL::LinkProgram()
183 {
184 	_linked = 0;
185 
186 	if(_programID==0) return 0;
187 
188 	glLinkProgram(_programID);
189 
190 	CheckLinkLog();
191 
192 //	GlobalUtil::StartTimer("100 link test");
193 //	for(int i = 0; i<100; i++) glLinkProgram(_programID);
194 //	GlobalUtil::StopTimer();
195 
196 	return _linked;
197 }
198 
CheckLinkLog()199 void ProgramGLSL::CheckLinkLog()
200 {
201 	GLint status;
202 	glGetProgramiv(_programID, GL_LINK_STATUS, &status);
203 
204 	_linked = (status == GL_TRUE);
205 
206 }
207 
208 
ValidateProgram()209 int ProgramGLSL::ValidateProgram()
210 {
211 	if(_programID && _linked)
212 	{
213 ///		GLint status;
214 //		glValidateProgram(_programID);
215 //		glGetProgramiv(_programID, GL_VALIDATE_STATUS, &status);
216 //		return status == GL_TRUE;
217 		return 1;
218 	}
219 	else
220 		return 0;
221 }
222 
PrintLinkLog(std::ostream & os)223 void ProgramGLSL::PrintLinkLog(std::ostream &os)
224 {
225 	GLint len = 0;
226 
227 	glGetProgramiv(_programID, GL_INFO_LOG_LENGTH , &len);
228 	if(len <=1) return;
229 
230 	char* linkLog = new char[len+1];
231 	if(linkLog == NULL) return;
232 
233 	glGetProgramInfoLog(_programID, len, &len, linkLog);
234 
235 	linkLog[len] = 0;
236 
237 	if(strstr(linkLog, "failed"))
238 	{
239 		os<<linkLog + (linkLog[0] == ' '? 1:0)<<"\n";
240 		_linked = 0;
241 	}
242 
243 	delete[] linkLog;
244 }
245 
UseProgram()246 int ProgramGLSL::UseProgram()
247 {
248 	if(ValidateProgram())
249 	{
250 		glUseProgram(_programID);
251 		if (_TextureParam0 >= 0) glUniform1i(_TextureParam0, 0);
252 		return true;
253 	}
254 	else
255 	{
256 		return false;
257 	}
258 }
259 
260 
ProgramGLSL(const char * frag_source)261 ProgramGLSL::ProgramGLSL(const char *frag_source)
262 {
263 	_linked = 0;
264 	_programID = glCreateProgram();
265 	_TextureParam0 = -1;
266 	ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
267 
268 	if(shader.IsValidFragmentShader())
269 	{
270 		AttachShaderObject(shader);
271 		LinkProgram();
272 
273 		if(!_linked)
274 		{
275 			//shader.PrintCompileLog(std::cout);
276 			PrintLinkLog(std::cout);
277 		} else
278 		{
279 			_TextureParam0 = glGetUniformLocation(_programID, "tex");
280 		}
281 	}else
282 	{
283 		_linked = 0;
284 	}
285 
286 }
287 
288 /*
289 ProgramGLSL::ProgramGLSL(char*frag_source, char * vert_source)
290 {
291 	_used = 0;
292 	_linked = 0;
293 	_programID = glCreateProgram();
294 	ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
295 	ShaderObject vertex_shader(GL_VERTEX_SHADER, vert_source);
296 	AttachShaderObject(shader);
297 	AttachShaderObject(vertex_shader);
298 	LinkProgram();
299 	if(!_linked)
300 	{
301 		shader.PrintCompileLog(std::cout);
302 		vertex_shader.PrintCompileLog(std::cout);
303 		PrintLinkLog(std::cout);
304 		std::cout<<vert_source;
305 		std::cout<<frag_source;
306 	}
307 
308 }
309 */
310 
311 
312 
ReLink()313 void ProgramGLSL::ReLink()
314 {
315 	glLinkProgram(_programID);
316 }
317 
IsNative()318 int ProgramGLSL::IsNative()
319 {
320 	return _linked;
321 }
322 
FilterGLSL(float sigma)323 FilterGLSL::FilterGLSL(float sigma)
324 {
325 	//pixel inside 3*sigma box
326 	int sz = int( ceil( GlobalUtil::_FilterWidthFactor * sigma -0.5) ) ;//
327 	int width = 2*sz + 1;
328 
329 	//filter size truncation
330 	if(GlobalUtil::_MaxFilterWidth >0 && width > GlobalUtil::_MaxFilterWidth)
331 	{
332 		std::cout<<"Filter size truncated from "<<width<<" to "<<GlobalUtil::_MaxFilterWidth<<endl;
333 		sz = GlobalUtil::_MaxFilterWidth>>1;
334 		width = 2 * sz + 1;
335 	}
336 
337 	int i;
338 	float * kernel = new float[width];
339 	float   rv = 1.0f/(sigma*sigma);
340 	float   v, ksum =0;
341 
342 	// pre-compute filter
343 	for( i = -sz ; i <= sz ; ++i)
344 	{
345 		kernel[i+sz] =  v = exp(-0.5f * i * i *rv) ;
346 		ksum += v;
347 	}
348 
349 	//normalize the kernel
350 	rv = 1.0f / ksum;
351 	for(i = 0; i< width ;i++) kernel[i]*=rv;
352 	//
353 
354     MakeFilterProgram(kernel, width);
355 
356 	_size = sz;
357 
358 	delete[] kernel;
359     if(GlobalUtil::_verbose && GlobalUtil::_timingL) std::cout<<"Filter: sigma = "<<sigma<<", size = "<<width<<"x"<<width<<endl;
360 }
361 
362 
MakeFilterProgram(float kernel[],int width)363 void FilterGLSL::MakeFilterProgram(float kernel[], int width)
364 {
365 	if(GlobalUtil::_usePackedTex)
366 	{
367 		s_shader_h = CreateFilterHPK(kernel, width);
368 		s_shader_v = CreateFilterVPK(kernel, width);
369 	}else
370 	{
371 		s_shader_h = CreateFilterH(kernel, width);
372 		s_shader_v = CreateFilterV(kernel, width);
373 	}
374 }
375 
CreateFilterH(float kernel[],int width)376 ProgramGPU* FilterGLSL::CreateFilterH(float kernel[], int width)
377 {
378 	ostringstream out;
379 	out<<setprecision(8);
380 
381 	out<<  "uniform sampler2DRect tex;";
382 	out<< "\nvoid main(void){ float intensity = 0.0 ;  vec2 pos;\n";
383 
384     int half_width = width / 2;
385 	for(int i = 0; i< width; i++)
386 	{
387 		if(i == half_width)
388 		{
389 
390 			out<<"float or = texture2DRect(tex, gl_TexCoord[0].st).r;\n";
391 			out<<"intensity+= or * "<<kernel[i]<<";\n";
392 		}else
393 		{
394 			out<<"pos = gl_TexCoord[0].st + vec2(float("<< (i - half_width) <<") , 0);\n";
395 			out<<"intensity+= "<<kernel[i]<<"*texture2DRect(tex, pos).r;\n";
396 		}
397 	}
398 
399 	//copy original data to red channel
400 	out<<"gl_FragColor.r = or;\n";
401 	out<<"gl_FragColor.b  = intensity;}\n"<<'\0';
402 
403 	return new ProgramGLSL(out.str().c_str());
404 }
405 
406 
CreateFilterV(float kernel[],int height)407 ProgramGPU* FilterGLSL::CreateFilterV(float kernel[], int height)
408 {
409 	ostringstream out;
410 	out<<setprecision(8);
411 
412 	out<<  "uniform sampler2DRect tex;";
413 	out<< "\nvoid main(void){ float intensity = 0.0;vec2 pos; \n";
414     int half_height = height / 2;
415 	for(int i = 0; i< height; i++)
416 	{
417 
418 		if(i == half_height)
419 		{
420 			out<<"vec2 orb = texture2DRect(tex, gl_TexCoord[0].st).rb;\n";
421 			out<<"intensity+= orb.y * "<<kernel[i]<<";\n";
422 
423 		}else
424 		{
425 			out<<"pos = gl_TexCoord[0].st + vec2(0, float("<<(i - half_height) <<") );\n";
426 			out<<"intensity+= texture2DRect(tex, pos).b * "<<kernel[i]<<";\n";
427 		}
428 
429 	}
430 
431 	out<<"gl_FragColor.b = orb.y;\n";
432 	out<<"gl_FragColor.g = intensity - orb.x;\n"; // difference of gaussian..
433 	out<<"gl_FragColor.r = intensity;}\n"<<'\0';
434 
435 //	std::cout<<buffer<<endl;
436 	return new ProgramGLSL(out.str().c_str());
437 }
438 
439 
440 
CreateFilterHPK(float kernel[],int width)441 ProgramGPU* FilterGLSL::CreateFilterHPK(float kernel[], int width)
442 {
443 	//both h and v are packed...
444 	int i, j , xw, xwn;
445 
446 	int halfwidth  = width >>1;
447 	float * pf = kernel + halfwidth;
448 	int nhpixel = (halfwidth+1)>>1;	//how many neighbour pixels need to be looked up
449 	int npixel  = (nhpixel<<1)+1;//
450 	float weight[3];
451 	ostringstream out;;
452 	out<<setprecision(8);
453 
454 	out<<  "uniform sampler2DRect tex;";
455 	out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
456 	///use multi texture coordinate because nhpixels can be at most 3
457 	out<<"vec4 pc; vec2 coord; \n";
458 	for( i = 0 ; i < npixel ; i++)
459 	{
460 		out<<"coord = gl_TexCoord[0].xy + vec2(float("<<i-nhpixel<<"),0);\n";
461 		out<<"pc=texture2DRect(tex, coord);\n";
462 		if(GlobalUtil::_PreciseBorder)		out<<"if(coord.x < 0.0) pc = pc.rrbb;\n";
463 		//for each sub-pixel j  in center, the weight of sub-pixel k
464 		xw = (i - nhpixel)*2;
465 		for( j = 0; j < 3; j++)
466 		{
467 			xwn = xw  + j  -1;
468 			weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
469 		}
470 		if(weight[1] == 0.0)
471 		{
472 			out<<"result += vec4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
473 		}
474 		else
475 		{
476 			out<<"result += vec4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n";
477 			out<<"result += vec4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n";
478 		}
479 
480 	}
481 	out<<"gl_FragColor = result;}\n"<<'\0';
482 
483 	return new ProgramGLSL(out.str().c_str());
484 
485 
486 }
487 
488 
CreateFilterVPK(float kernel[],int height)489 ProgramGPU* FilterGLSL::CreateFilterVPK(float kernel[], int height)
490 {
491 
492 	//both h and v are packed...
493 	int i, j, yw, ywn;
494 
495 	int halfh  = height >>1;
496 	float * pf = kernel + halfh;
497 	int nhpixel = (halfh+1)>>1;	//how many neighbour pixels need to be looked up
498 	int npixel  = (nhpixel<<1)+1;//
499 	float weight[3];
500 	ostringstream out;;
501 	out<<setprecision(8);
502 
503 	out<<  "uniform sampler2DRect tex;";
504 	out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
505 	///use multi texture coordinate because nhpixels can be at most 3
506 	out<<"vec4 pc; vec2 coord;\n";
507 	for( i = 0 ; i < npixel ; i++)
508 	{
509 		out<<"coord = gl_TexCoord[0].xy + vec2(0, float("<<i-nhpixel<<"));\n";
510 		out<<"pc=texture2DRect(tex, coord);\n";
511 		if(GlobalUtil::_PreciseBorder)	out<<"if(coord.y < 0.0) pc = pc.rgrg;\n";
512 
513 		//for each sub-pixel j  in center, the weight of sub-pixel k
514 		yw = (i - nhpixel)*2;
515 		for( j = 0; j < 3; j++)
516 		{
517 			ywn = yw + j  -1;
518 			weight[j] = ywn < -halfh || ywn > halfh? 0 : pf[ywn];
519 		}
520 		if(weight[1] == 0.0)
521 		{
522 			out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
523 		}else
524 		{
525 			out<<"result += vec4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n";
526 			out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n";
527 		}
528 	}
529 	out<<"gl_FragColor = result;}\n"<<'\0';
530 
531 	return new ProgramGLSL(out.str().c_str());
532 }
533 
534 
535 
ShaderBag()536 ShaderBag::ShaderBag()
537 {
538 	s_debug = 0;
539 	s_orientation = 0;
540 	s_display_gaussian = 0;
541 	s_display_dog = 0;
542 	s_display_grad = 0;
543 	s_display_keys = 0;
544 	s_sampling = 0;
545 	s_grad_pass = 0;
546 	s_dog_pass = 0;
547 	s_keypoint = 0;
548 	s_genlist_init_tight = 0;
549 	s_genlist_init_ex = 0;
550 	s_genlist_histo = 0;
551 	s_genlist_start = 0;
552 	s_genlist_step = 0;
553 	s_genlist_end = 0;
554 	s_vertex_list = 0;
555 	s_descriptor_fp = 0;
556 	s_margin_copy = 0;
557     ////////////
558     f_gaussian_skip0 = NULL;
559     f_gaussian_skip1 = NULL;
560     f_gaussian_step = NULL;
561     _gaussian_step_num = 0;
562 
563 }
564 
~ShaderBag()565 ShaderBag::~ShaderBag()
566 {
567 	if(s_debug)delete s_debug;
568 	if(s_orientation)delete s_orientation;
569 	if(s_display_gaussian)delete s_display_gaussian;
570 	if(s_display_dog)delete s_display_dog;
571 	if(s_display_grad)delete s_display_grad;
572 	if(s_display_keys)delete s_display_keys;
573 	if(s_sampling)delete s_sampling;
574 	if(s_grad_pass)delete s_grad_pass;
575 	if(s_dog_pass) delete s_dog_pass;
576 	if(s_keypoint)delete s_keypoint;
577 	if(s_genlist_init_tight)delete s_genlist_init_tight;
578 	if(s_genlist_init_ex)delete s_genlist_init_ex;
579 	if(s_genlist_histo)delete s_genlist_histo;
580 	if(s_genlist_start)delete s_genlist_start;
581 	if(s_genlist_step)delete s_genlist_step;
582 	if(s_genlist_end)delete s_genlist_end;
583 	if(s_vertex_list)delete s_vertex_list;
584 	if(s_descriptor_fp)delete s_descriptor_fp;
585 	if(s_margin_copy) delete s_margin_copy;
586 
587     //////////////////////////////////////////////
588     if(f_gaussian_skip1) delete f_gaussian_skip1;
589 
590     for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
591     {
592 	    if(f_gaussian_skip0_v[i]) delete f_gaussian_skip0_v[i];
593     }
594     if(f_gaussian_step && _gaussian_step_num > 0)
595     {
596 	    for(int i = 0; i< _gaussian_step_num; i++)
597 	    {
598 		    delete f_gaussian_step[i];
599 	    }
600 	    delete[] f_gaussian_step;
601     }
602 }
603 
604 
SelectInitialSmoothingFilter(int octave_min,SiftParam & param)605 void ShaderBag::SelectInitialSmoothingFilter(int octave_min, SiftParam&param)
606 {
607     float sigma = param.GetInitialSmoothSigma(octave_min);
608     if(sigma == 0)
609     {
610        f_gaussian_skip0 = NULL;
611     }else
612     {
613 	    for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
614 	    {
615 		    if(f_gaussian_skip0_v[i]->_id == octave_min)
616 		    {
617 			    f_gaussian_skip0 = f_gaussian_skip0_v[i];
618 			    return ;
619 		    }
620 	    }
621 	    FilterGLSL * filter = new FilterGLSL(sigma);
622 	    filter->_id = octave_min;
623 	    f_gaussian_skip0_v.push_back(filter);
624 	    f_gaussian_skip0 = filter;
625     }
626 }
627 
CreateGaussianFilters(SiftParam & param)628 void ShaderBag::CreateGaussianFilters(SiftParam&param)
629 {
630 	if(param._sigma_skip0>0.0f)
631 	{
632         FilterGLSL * filter;
633 		f_gaussian_skip0 = filter = new FilterGLSL(param._sigma_skip0);
634 		filter->_id = GlobalUtil::_octave_min_default;
635 		f_gaussian_skip0_v.push_back(filter);
636 	}
637 	if(param._sigma_skip1>0.0f)
638 	{
639 		f_gaussian_skip1 = new FilterGLSL(param._sigma_skip1);
640 	}
641 
642 	f_gaussian_step = new FilterProgram*[param._sigma_num];
643 	for(int i = 0; i< param._sigma_num; i++)
644 	{
645 		f_gaussian_step[i] =  new FilterGLSL(param._sigma[i]);
646 	}
647     _gaussian_step_num = param._sigma_num;
648 }
649 
650 
LoadDynamicShaders(SiftParam & param)651 void ShaderBag::LoadDynamicShaders(SiftParam& param)
652 {
653     LoadKeypointShader(param._dog_threshold, param._edge_threshold);
654     LoadGenListShader(param._dog_level_num, 0);
655     CreateGaussianFilters(param);
656 }
657 
658 
LoadFixedShaders()659 void ShaderBagGLSL::LoadFixedShaders()
660 {
661 
662 
663 	s_gray = new ProgramGLSL(
664 		"uniform sampler2DRect tex; void main(void){\n"
665 		"float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex, gl_TexCoord[0].st ).rgb);\n"
666 		"gl_FragColor = vec4(intensity, intensity, intensity, 1.0);}");
667 
668 
669 	s_debug = new ProgramGLSL( "void main(void){gl_FragColor.rg =  gl_TexCoord[0].st;}");
670 
671 
672 	s_sampling = new ProgramGLSL(
673 		"uniform sampler2DRect tex; void main(void){gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg;}");
674 
675 	//
676 	s_grad_pass = new ProgramGLSL(
677 	"uniform sampler2DRect tex; void main ()\n"
678 	"{\n"
679 	"	vec4 v1, v2, gg;\n"
680 	"	vec4 cc  = texture2DRect(tex, gl_TexCoord[0].xy);\n"
681 	"	gg.x = texture2DRect(tex, gl_TexCoord[1].xy).r;\n"
682 	"	gg.y = texture2DRect(tex, gl_TexCoord[2].xy).r;\n"
683 	"	gg.z = texture2DRect(tex, gl_TexCoord[3].xy).r;\n"
684 	"	gg.w = texture2DRect(tex, gl_TexCoord[4].xy).r;\n"
685 	"	vec2 dxdy = (gg.yw - gg.xz); \n"
686 	"	float grad = 0.5*length(dxdy);\n"
687 	"	float theta = grad==0.0? 0.0: atan(dxdy.y, dxdy.x);\n"
688 	"	gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
689 	"}\n\0");
690 
691 	ProgramGLSL * program;
692 	s_margin_copy = program = new ProgramGLSL(
693 	"uniform sampler2DRect tex; uniform vec2 truncate;\n"
694 	"void main(){ gl_FragColor = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate)); }");
695 
696 	_param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
697 
698 
699 	GlobalUtil::_OrientationPack2 = 0;
700 	LoadOrientationShader();
701 
702 	if(s_orientation == NULL)
703 	{
704 		//Load a simplified version if the right version is not supported
705 		s_orientation = program =  new ProgramGLSL(
706 		"uniform sampler2DRect tex; uniform sampler2DRect oTex;\n"
707 	"	uniform float size; void main(){\n"
708 	"	vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"
709 	"	vec4 oo = texture2DRect(oTex, cc.rg);\n"
710 	"	gl_FragColor.rg = cc.rg;\n"
711 	"	gl_FragColor.b = oo.a;\n"
712 	"	gl_FragColor.a = size;}");
713 
714 		_param_orientation_gtex = glGetUniformLocation(*program, "oTex");
715 		_param_orientation_size = glGetUniformLocation(*program, "size");
716 		GlobalUtil::_MaxOrientation = 0;
717 		GlobalUtil::_FullSupported = 0;
718 		std::cerr<<"Orientation simplified on this hardware"<<endl;
719 	}
720 
721 	if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();
722 	if(s_descriptor_fp == NULL)
723 	{
724 		GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0;
725 		std::cerr<<"Descriptor ignored on this hardware"<<endl;
726 	}
727 
728 	s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");
729 }
730 
731 
LoadDisplayShaders()732 void ShaderBagGLSL::LoadDisplayShaders()
733 {
734 	s_copy_key = new ProgramGLSL(
735 		"uniform sampler2DRect tex; void main(){\n"
736 	"gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg; gl_FragColor.ba = vec2(0.0,1.0);	}");
737 
738 
739 	ProgramGLSL * program;
740 	s_vertex_list = program = new ProgramGLSL(
741 	"uniform vec4 sizes; uniform sampler2DRect tex;\n"
742 	"void main(void){\n"
743 	"float fwidth = sizes.y; float twidth = sizes.z; float rwidth = sizes.w; \n"
744 	"float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
745 	"float px = mod(index, twidth);\n"
746 	"vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
747 	"vec4 cc = texture2DRect(tex, tpos );\n"
748 	"float size = 3.0 * cc.a; //sizes.x;// \n"
749 	"gl_FragColor.zw = vec2(0.0, 1.0);\n"
750 	"if(any(lessThan(cc.xy,vec2(0.0))))  {gl_FragColor.xy = cc.xy; }\n"
751 	"else {float type = fract(px);\n"
752 	"vec2 dxy = vec2(0); \n"
753 	"dxy.x = type < 0.1 ? 0.0 : (((type <0.5) || (type > 0.9))? size : -size);\n"
754 	"dxy.y = type < 0.2 ? 0.0 : (((type < 0.3) || (type > 0.7) )? -size :size); \n"
755 	"float s = sin(cc.b); float c = cos(cc.b); \n"
756 	"gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
757 	"gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n}\n");
758 
759 	_param_genvbo_size = glGetUniformLocation(*program, "sizes");
760 
761 	s_display_gaussian =  new ProgramGLSL(
762 	"uniform sampler2DRect tex; void main(void){float r = texture2DRect(tex, gl_TexCoord[0].st).r;\n"
763 	"gl_FragColor = vec4(r, r, r, 1);}" );
764 
765 	s_display_dog =  new ProgramGLSL(
766 	"uniform sampler2DRect tex; void main(void){float g = 0.5+(20.0*texture2DRect(tex, gl_TexCoord[0].st).g);\n"
767 	"gl_FragColor = vec4(g, g, g, 0.0);}" );
768 
769 	s_display_grad = new ProgramGLSL(
770 		"uniform sampler2DRect tex; void main(void){\n"
771     "	vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);gl_FragColor = vec4(5.0* cc.bbb, 1.0);}");
772 
773 	s_display_keys= new ProgramGLSL(
774 		"uniform sampler2DRect tex; void main(void){\n"
775 	"	vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"
776 	"	if(cc.r ==0.0) discard; gl_FragColor =  (cc.r==1.0? vec4(1.0, 0.0, 0,1.0):vec4(0.0,1.0,0.0,1.0));}");
777 }
778 
LoadKeypointShader(float threshold,float edge_threshold)779 void ShaderBagGLSL::LoadKeypointShader(float threshold, float edge_threshold)
780 {
781 	float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
782 	float threshold1 = threshold;
783 	float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
784 	ostringstream out;;
785 	streampos pos;
786 
787 	//tex(X)(Y)
788 	//X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)
789 	//Y: (CDU) (CENTER 0, DOWN -1, UP    +1)
790 	if(GlobalUtil::_DarknessAdaption)
791 	{
792 		out <<	"#define THRESHOLD0 (" << threshold0 << " * min(2.0 * cc.r + 0.1, 1.0))\n"
793 				"#define THRESHOLD1 (" << threshold1 << " * min(2.0 * cc.r + 0.1, 1.0))\n"
794 				"#define THRESHOLD2 " << threshold2 << "\n";
795 	}else
796 	{
797 		out <<	"#define THRESHOLD0 " << threshold0 << "\n"
798 				"#define THRESHOLD1 " << threshold1 << "\n"
799 				"#define THRESHOLD2 " << threshold2 << "\n";
800 	}
801 
802 	out<<
803 	"uniform sampler2DRect tex, texU, texD; void main ()\n"
804 	"{\n"
805 	"	vec4 v1, v2, gg, temp;\n"
806 	"	vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
807 	"	vec4 cc  = texture2DRect(tex, gl_TexCoord[0].xy);\n"
808 	"	temp =  texture2DRect(tex, gl_TexCoord[1].xy);\n"
809 	"	v1.x =  temp.g;			gg.x = temp.r;\n"
810 	"	temp = texture2DRect(tex, gl_TexCoord[2].xy) ;\n"
811 	"	v1.y = temp.g;			gg.y = temp.r;\n"
812 	"	temp = texture2DRect(tex, gl_TexCoord[3].xy) ;\n"
813 	"	v1.z = temp.g;			gg.z = temp.r;\n"
814 	"	temp = texture2DRect(tex, gl_TexCoord[4].xy) ;\n"
815 	"	v1.w = temp.g;			gg.w = temp.r;\n"
816 	"	v2.x = texture2DRect(tex, gl_TexCoord[5].xy).g;\n"
817 	"	v2.y = texture2DRect(tex, gl_TexCoord[6].xy).g;\n"
818 	"	v2.z = texture2DRect(tex, gl_TexCoord[7].xy).g;\n"
819 	"	v2.w = texture2DRect(tex, TexRU.xy).g;\n"
820 	"	vec2 dxdy = (gg.yw - gg.xz); \n"
821 	"	float grad = 0.5*length(dxdy);\n"
822 	"	float theta = grad==0.0? 0.0: atan(dxdy.y, dxdy.x);\n"
823 	"	gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
824 
825 	//test against 8 neighbours
826 	//use variable to identify type of extremum
827 	//1.0 for local maximum and 0.5 for minimum
828 	<<
829 	"	float dog = 0.0; \n"
830 	"	gl_FragData[1] = vec4(0, 0, 0, 0); \n"
831 	"	dog = cc.g > float(THRESHOLD0) && all(greaterThan(cc.gggg, max(v1, v2)))?1.0: 0.0;\n"
832 	"	dog = cc.g < float(-THRESHOLD0) && all(lessThan(cc.gggg, min(v1, v2)))?0.5: dog;\n"
833 	"	if(dog == 0.0) return;\n";
834 
835 	pos = out.tellp();
836 	//do edge supression first..
837 	//vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
838 	//vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
839 
840 	out<<
841 	"	float fxx, fyy, fxy; \n"
842 	"	vec4 D2 = v1.xyzw - cc.gggg;\n"
843 	"	vec2 D4 = v2.xw - v2.yz;\n"
844 	"	fxx = D2.x + D2.y;\n"
845 	"	fyy = D2.z + D2.w;\n"
846 	"	fxy = 0.25*(D4.x + D4.y);\n"
847 	"	float fxx_plus_fyy = fxx + fyy;\n"
848 	"	float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
849 	"	float score_down = (fxx*fyy - fxy*fxy);\n"
850 	"	if( score_down <= 0.0 || score_up > THRESHOLD2 * score_down)return;\n";
851 
852 	//...
853 	out<<" \n"
854 	"	vec2 D5 = 0.5*(v1.yw-v1.xz); \n"
855 	"	float fx = D5.x, fy = D5.y ; \n"
856 	"	float fs, fss , fxs, fys ; \n"
857 	"	vec2 v3; vec4 v4, v5, v6;\n"
858 	//read 9 pixels of upper level
859 	<<
860 	"	v3.x = texture2DRect(texU, gl_TexCoord[0].xy).g;\n"
861 	"	v4.x = texture2DRect(texU, gl_TexCoord[1].xy).g;\n"
862 	"	v4.y = texture2DRect(texU, gl_TexCoord[2].xy).g;\n"
863 	"	v4.z = texture2DRect(texU, gl_TexCoord[3].xy).g;\n"
864 	"	v4.w = texture2DRect(texU, gl_TexCoord[4].xy).g;\n"
865 	"	v6.x = texture2DRect(texU, gl_TexCoord[5].xy).g;\n"
866 	"	v6.y = texture2DRect(texU, gl_TexCoord[6].xy).g;\n"
867 	"	v6.z = texture2DRect(texU, gl_TexCoord[7].xy).g;\n"
868 	"	v6.w = texture2DRect(texU, TexRU.xy).g;\n"
869 	//compare with 9 pixels of upper level
870 	//read and compare with 9 pixels of lower level
871 	//the maximum case
872 	<<
873 	"	if(dog == 1.0)\n"
874 	"	{\n"
875 	"		if(cc.g < v3.x || any(lessThan(cc.gggg, v4)) ||any(lessThan(cc.gggg, v6)))return; \n"
876 	"		v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
877 	"		v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
878 	"		v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
879 	"		v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
880 	"		v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
881 	"		v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
882 	"		v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
883 	"		v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
884 	"		v6.w = texture2DRect(texD, TexRU.xy).g;\n"
885 	"		if(cc.g < v3.y || any(lessThan(cc.gggg, v5)) ||any(lessThan(cc.gggg, v6)))return; \n"
886 	"	}\n"
887 	//the minimum case
888 	<<
889 	"	else{\n"
890 	"	if(cc.g > v3.x || any(greaterThan(cc.gggg, v4)) ||any(greaterThan(cc.gggg, v6)))return; \n"
891 	"		v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
892 	"		v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
893 	"		v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
894 	"		v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
895 	"		v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
896 	"		v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
897 	"		v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
898 	"		v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
899 	"		v6.w = texture2DRect(texD, TexRU.xy).g;\n"
900 	"		if(cc.g > v3.y || any(greaterThan(cc.gggg, v5)) ||any(greaterThan(cc.gggg, v6)))return; \n"
901 	"	}\n";
902 
903 	if(GlobalUtil::_SubpixelLocalization)
904 
905 	// sub-pixel localization FragData1 = vec4(dog, 0, 0, 0); return;
906 	out <<
907 	"	fs = 0.5*( v3.x - v3.y );  \n"
908 	"	fss = v3.x + v3.y - cc.g - cc.g;\n"
909 	"	fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
910 	"	fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
911 
912 	//
913 	// let dog difference be quatratic function  of dx, dy, ds;
914 	// df(dx, dy, ds) = fx * dx + fy*dy + fs * ds +
915 	//				  + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)
916 	//				  + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)
917 	// (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)
918 
919 	//the local extremum satisfies
920 	// df/dx = 0, df/dy = 0, df/dz = 0
921 
922 	//that is
923 	// |-fx|     | fxx fxy fxs |   |dx|
924 	// |-fy|  =  | fxy fyy fys | * |dy|
925 	// |-fs|     | fxs fys fss |   |ds|
926 	// need to solve dx, dy, ds
927 
928 	// Use Gauss elimination to solve the linear system
929     <<
930 	"	vec3 dxys = vec3(0.0);		\n"
931 	"	vec4 A0, A1, A2 ;			\n"
932 	"	A0 = vec4(fxx, fxy, fxs, -fx);	\n"
933 	"	A1 = vec4(fxy, fyy, fys, -fy);	\n"
934 	"	A2 = vec4(fxs, fys, fss, -fs);	\n"
935 	"	vec3 x3 = abs(vec3(fxx, fxy, fxs));		\n"
936 	"	float maxa = max(max(x3.x, x3.y), x3.z);	\n"
937 	"	if(maxa >= 1e-10 ) {						\n"
938 	"		if(x3.y ==maxa )							\n"
939 	"		{											\n"
940 	"			vec4 TEMP = A1; A1 = A0; A0 = TEMP;	\n"
941 	"		}else if( x3.z == maxa )					\n"
942 	"		{											\n"
943 	"			vec4 TEMP = A2; A2 = A0; A0 = TEMP;	\n"
944 	"		}											\n"
945 	"		A0 /= A0.x;									\n"
946 	"		A1 -= A1.x * A0;							\n"
947 	"		A2 -= A2.x * A0;							\n"
948 	"		vec2 x2 = abs(vec2(A1.y, A2.y));		\n"
949 	"		if( x2.y > x2.x )							\n"
950 	"		{											\n"
951 	"			vec3 TEMP = A2.yzw;					\n"
952 	"			A2.yzw = A1.yzw;						\n"
953 	"			A1.yzw = TEMP;							\n"
954 	"			x2.x = x2.y;							\n"
955 	"		}											\n"
956 	"		if(x2.x >= 1e-10) {						\n"
957 	"			A1.yzw /= A1.y;								\n"
958 	"			A2.yzw -= A2.y * A1.yzw;					\n"
959 	"			if(abs(A2.z) >= 1e-10) {		\n"
960 	// compute dx, dy, ds:
961 	<<
962 	"				\n"
963 	"				dxys.z = A2.w /A2.z;				    \n"
964 	"				dxys.y = A1.w - dxys.z*A1.z;			    \n"
965 	"				dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y;	\n"
966 
967 	//one more threshold which I forgot in versions prior to 286
968 	<<
969 	"				bool dog_test = (abs(cc.g + 0.5*dot(vec3(fx, fy, fs), dxys ))<= float(THRESHOLD1)) ;\n"
970 	"				if(dog_test || any(greaterThan(abs(dxys), vec3(1.0)))) dog = 0.0;\n"
971 	"			}\n"
972 	"		}\n"
973 	"	}\n"
974     //keep the point when the offset is less than 1
975 	<<
976 	"	gl_FragData[1] = vec4( dog, dxys); \n";
977 	else
978 
979 	out<<
980 	"	gl_FragData[1] =  vec4( dog, 0.0, 0.0, 0.0) ;	\n";
981 
982 	out<<
983 	"}\n" <<'\0';
984 
985 
986 
987 	ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
988 	if(program->IsNative())
989 	{
990 		s_keypoint = program ;
991 		//parameter
992 	}else
993 	{
994 		delete program;
995 		out.seekp(pos);
996 		out <<
997 	"	gl_FragData[1] =  vec4(dog, 0.0, 0.0, 0.0) ;	\n"
998 	"}\n" <<'\0';
999 		s_keypoint = program = new ProgramGLSL(out.str().c_str());
1000 		GlobalUtil::_SubpixelLocalization = 0;
1001 		std::cerr<<"Detection simplified on this hardware"<<endl;
1002 	}
1003 
1004 	_param_dog_texu = glGetUniformLocation(*program, "texU");
1005 	_param_dog_texd = glGetUniformLocation(*program, "texD");
1006 }
1007 
1008 
SetDogTexParam(int texU,int texD)1009 void ShaderBagGLSL::SetDogTexParam(int texU, int texD)
1010 {
1011 	glUniform1i(_param_dog_texu, 1);
1012 	glUniform1i(_param_dog_texd, 2);
1013 }
1014 
SetGenListStepParam(int tex,int tex0)1015 void ShaderBagGLSL::SetGenListStepParam(int tex, int tex0)
1016 {
1017 	glUniform1i(_param_genlist_step_tex0, 1);
1018 }
SetGenVBOParam(float width,float fwidth,float size)1019 void ShaderBagGLSL::SetGenVBOParam( float width, float fwidth,  float size)
1020 {
1021 	float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
1022 	glUniform4fv(_param_genvbo_size, 1, sizes);
1023 
1024 }
1025 
1026 
1027 
UnloadProgram()1028 void ShaderBagGLSL::UnloadProgram()
1029 {
1030 	glUseProgram(0);
1031 }
1032 
1033 
1034 
LoadGenListShader(int ndoglev,int nlev)1035 void ShaderBagGLSL::LoadGenListShader(int ndoglev, int nlev)
1036 {
1037 	ProgramGLSL * program;
1038 
1039 	s_genlist_init_tight = new ProgramGLSL(
1040 	"uniform sampler2DRect tex; void main (void){\n"
1041 	"vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r,  texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
1042 	"texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
1043 	"gl_FragColor = vec4(greaterThan(helper, vec4(0.0,0.0,0.0,0.0)));\n"
1044 	"}");
1045 
1046 
1047 	s_genlist_init_ex = program = new ProgramGLSL(
1048 	"uniform sampler2DRect tex;uniform vec2 bbox;\n"
1049 	"void main (void ){\n"
1050 	"vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r,  texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
1051 	"texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
1052 	"bvec4 helper2 = bvec4( \n"
1053 	"all(lessThan(gl_TexCoord[0].xy , bbox)) && helper.x >0.0,\n"
1054 	"all(lessThan(gl_TexCoord[1].xy , bbox)) && helper.y >0.0,\n"
1055 	"all(lessThan(gl_TexCoord[2].xy , bbox)) && helper.z >0.0,\n"
1056 	"all(lessThan(gl_TexCoord[3].xy , bbox)) && helper.w >0.0);\n"
1057 	"gl_FragColor = vec4(helper2);\n"
1058 	"}");
1059 	_param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
1060 
1061 
1062 	//reduction ...
1063 	s_genlist_histo = new ProgramGLSL(
1064 	"uniform sampler2DRect tex; void main (void){\n"
1065 	"vec4 helper; vec4 helper2; \n"
1066 	"helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
1067 	"helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
1068 	"gl_FragColor.rg = helper2.xz + helper2.yw;\n"
1069 	"helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
1070 	"helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
1071 	"gl_FragColor.ba= helper2.xz+helper2.yw;\n"
1072 	"}");
1073 
1074 
1075 	//read of the first part, which generates tex coordinates
1076 	s_genlist_start= program =  LoadGenListStepShader(1, 1);
1077 	_param_ftex_width= glGetUniformLocation(*program, "width");
1078 	_param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
1079 	//stepping
1080 	s_genlist_step = program = LoadGenListStepShader(0, 1);
1081 	_param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
1082 
1083 }
1084 
SetMarginCopyParam(int xmax,int ymax)1085 void ShaderBagGLSL::SetMarginCopyParam(int xmax, int ymax)
1086 {
1087 	float truncate[2] = {xmax - 0.5f , ymax - 0.5f};
1088 	glUniform2fv(_param_margin_copy_truncate, 1, truncate);
1089 }
1090 
SetGenListInitParam(int w,int h)1091 void ShaderBagGLSL::SetGenListInitParam(int w, int h)
1092 {
1093 	float bbox[2] = {w - 1.0f, h - 1.0f};
1094 	glUniform2fv(_param_genlist_init_bbox, 1, bbox);
1095 }
SetGenListStartParam(float width,int tex0)1096 void ShaderBagGLSL::SetGenListStartParam(float width, int tex0)
1097 {
1098 	glUniform1f(_param_ftex_width, width);
1099 	glUniform1i(_param_genlist_start_tex0, 0);
1100 }
1101 
1102 
LoadGenListStepShader(int start,int step)1103 ProgramGLSL* ShaderBagGLSL::LoadGenListStepShader(int start, int step)
1104 {
1105 	int i;
1106 	// char chanels[5] = "rgba";
1107 	ostringstream out;
1108 
1109 	for(i = 0; i < step; i++) out<<"uniform sampler2DRect tex"<<i<<";\n";
1110 	if(start)
1111 	{
1112 		out<<"uniform float width;\n";
1113 		out<<"void main(void){\n";
1114 		out<<"float  index = floor(gl_TexCoord[0].y) * width + floor(gl_TexCoord[0].x);\n";
1115 		out<<"vec2 pos = vec2(0.5, 0.5);\n";
1116 	}else
1117 	{
1118 		out<<"uniform sampler2DRect tex;\n";
1119 		out<<"void main(void){\n";
1120 		out<<"vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n";
1121 		out<<"vec2 pos = tc.rg; float index = tc.b;\n";
1122 	}
1123 	out<<"vec2 sum; 	vec4 cc;\n";
1124 
1125 
1126 	if(step>0)
1127 	{
1128 		out<<"vec2 cpos = vec2(-0.5, 0.5);\t vec2 opos;\n";
1129 		for(i = 0; i < step; i++)
1130 		{
1131 
1132 			out<<"cc = texture2DRect(tex"<<i<<", pos);\n";
1133 			out<<"sum.x = cc.r + cc.g; sum.y = sum.x + cc.b;  \n";
1134 			out<<"if (index <cc.r){ opos = cpos.xx;}\n";
1135 			out<<"else if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}\n";
1136 			out<<"else if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}\n";
1137 			out<<"else {opos = cpos.yy; index -= sum.y;}\n";
1138 			out<<"pos = (pos + pos + opos);\n";
1139 		}
1140 	}
1141 	out<<"gl_FragColor = vec4(pos, index, 1.0);\n";
1142 	out<<"}\n"<<'\0';
1143 	return new ProgramGLSL(out.str().c_str());
1144 }
1145 
1146 
LoadOrientationShader()1147 void ShaderBagGLSL::LoadOrientationShader()
1148 {
1149 	ostringstream out;
1150 
1151 	if(GlobalUtil::_IsNvidia)
1152 	{
1153 	out <<	"#pragma optionNV(ifcvt none)\n"
1154 			"#pragma optionNV(unroll all)\n";
1155 	}
1156 
1157 	out<<"\n"
1158 	"#define GAUSSIAN_WF float("<<GlobalUtil::_OrientationGaussianFactor<<") \n"
1159 	"#define SAMPLE_WF float("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
1160 	"#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
1161 	"uniform sampler2DRect tex;					\n"
1162 	"uniform sampler2DRect gradTex;				\n"
1163 	"uniform vec4 size;						\n"
1164 	<< ((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)? "	uniform sampler2DRect texS;	\n" : " ")	<<
1165 	"void main()		\n"
1166 	"{													\n"
1167 	"	vec4 bins[10];								\n"
1168 	"	bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0);	\n"
1169 	"	bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0);	\n"
1170 	"	bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0);	\n"
1171 	"	vec4 loc = texture2DRect(tex, gl_TexCoord[0].xy);	\n"
1172 	"	vec2 pos = loc.xy;		\n"
1173 	"	bool orientation_mode = (size.z != 0.0);			\n"
1174 	"	float sigma = orientation_mode? abs(size.z) : loc.w; \n";
1175 	if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
1176 	{
1177 		out<<
1178 	"	if(orientation_mode){\n"
1179 	"		vec4 offset = texture2DRect(texS, pos);\n"
1180 	"		pos.xy = pos.xy + offset.yz; \n"
1181 	"		sigma = sigma * pow(size.w, offset.w);\n"
1182 	"		#if "<< GlobalUtil::_KeepExtremumSign << "\n"
1183 	"			if(offset.x < 0.6) sigma = -sigma; \n"
1184 	"		#endif\n"
1185 	"	}\n";
1186 	}
1187 	out<<
1188 	"	//bool fixed_orientation = (size.z < 0.0);		\n"
1189 	"	if(size.z < 0.0) {gl_FragData[0] = vec4(pos, 0.0, sigma); return;}"
1190 	"	float gsigma = sigma * GAUSSIAN_WF;				\n"
1191 	"	vec2 win = abs(vec2(sigma * (SAMPLE_WF * GAUSSIAN_WF))) ;	\n"
1192 	"	vec2 dim = size.xy;							\n"
1193 	"	float dist_threshold = win.x*win.x+0.5;			\n"
1194 	"	float factor = -0.5/(gsigma*gsigma);			\n"
1195 	"	vec4 sz;	vec2 spos;						\n"
1196 	"	//if(any(pos.xy <= 1)) discard;					\n"
1197 	"	sz.xy = max( pos - win, vec2(1,1));			\n"
1198 	"	sz.zw = min( pos + win, dim-vec2(2, 2));				\n"
1199 	"	sz = floor(sz)+0.5;";
1200 	//loop to get the histogram
1201 
1202 	out<<"\n"
1203 	"	for(spos.y = sz.y; spos.y <= sz.w;	spos.y+=1.0)				\n"
1204 	"	{																\n"
1205 	"		for(spos.x = sz.x; spos.x <= sz.z;	spos.x+=1.0)			\n"
1206 	"		{															\n"
1207 	"			vec2 offset = spos - pos;								\n"
1208 	"			float sq_dist = dot(offset,offset);						\n"
1209 	"			if( sq_dist < dist_threshold){							\n"
1210 	"				vec4 cc = texture2DRect(gradTex, spos);				\n"
1211 	"				float grad = cc.b;	float theta = cc.a;				\n"
1212 	"				float idx = floor(degrees(theta)*0.1);				\n"
1213 	"				if(idx < 0.0 ) idx += 36.0;									\n"
1214 	"				float weight = grad*exp(sq_dist * factor);				\n"
1215 	"				float vidx = fract(idx * 0.25) * 4.0;//mod(idx, 4.0) ;							\n"
1216 	"				vec4 inc = weight*vec4(equal(vec4(vidx), vec4(0.0,1.0,2.0,3.0)));";
1217 
1218 	if(GlobalUtil::_UseDynamicIndexing)
1219 	{
1220 		//dynamic indexing may not be faster
1221 		out<<"\n"
1222 	"				int iidx = int((idx*0.25));	\n"
1223 	"				bins[iidx]+=inc;					\n"
1224 	"			}										\n"
1225 	"		}											\n"
1226 	"	}";
1227 
1228 	}else
1229 	{
1230 		//nvfp40 still does not support dynamic array indexing
1231 		//unrolled binary search...
1232 		out<<"\n"
1233 	"				if(idx < 16.0)							\n"
1234 	"				{										\n"
1235 	"					if(idx < 8.0)							\n"
1236 	"					{									\n"
1237 	"						if(idx < 4.0)	{	bins[0]+=inc;}	\n"
1238 	"						else		{	bins[1]+=inc;}	\n"
1239 	"					}else								\n"
1240 	"					{									\n"
1241 	"						if(idx < 12.0){	bins[2]+=inc;}	\n"
1242 	"						else		{	bins[3]+=inc;}	\n"
1243 	"					}									\n"
1244 	"				}else if(idx < 32.0)						\n"
1245 	"				{										\n"
1246 	"					if(idx < 24.0)						\n"
1247 	"					{									\n"
1248 	"						if(idx <20.0)	{	bins[4]+=inc;}	\n"
1249 	"						else		{	bins[5]+=inc;}	\n"
1250 	"					}else								\n"
1251 	"					{									\n"
1252 	"						if(idx < 28.0){	bins[6]+=inc;}	\n"
1253 	"						else		{	bins[7]+=inc;}	\n"
1254 	"					}									\n"
1255 	"				}else 						\n"
1256 	"				{										\n"
1257 	"					bins[8]+=inc;						\n"
1258 	"				}										\n"
1259 	"			}										\n"
1260 	"		}											\n"
1261 	"	}";
1262 
1263 	}
1264 
1265 	WriteOrientationCodeToStream(out);
1266 
1267 	ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
1268 	if(program->IsNative())
1269 	{
1270 		s_orientation = program ;
1271 		_param_orientation_gtex = glGetUniformLocation(*program, "gradTex");
1272 		_param_orientation_size = glGetUniformLocation(*program, "size");
1273 		_param_orientation_stex = glGetUniformLocation(*program, "texS");
1274 	}else
1275 	{
1276 		delete program;
1277 	}
1278 }
1279 
1280 
WriteOrientationCodeToStream(std::ostream & out)1281 void ShaderBagGLSL::WriteOrientationCodeToStream(std::ostream& out)
1282 {
1283 	//smooth histogram and find the largest
1284 /*
1285 	smoothing kernel:	 (1 3 6 7 6 3 1 )/27
1286 	the same as 3 pass of (1 1 1)/3 averaging
1287 	maybe better to use 4 pass on the vectors...
1288 */
1289 
1290 
1291 	//the inner loop on different array numbers is always unrolled in fp40
1292 
1293 	//bug fixed here:)
1294 	out<<"\n"
1295 	"	//mat3 m1 = mat3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;  \n"
1296 	"	mat3 m1 = mat3(1, 3, 6, 0, 1, 3,0, 0, 1)/27.0;  \n"
1297 	"	mat4 m2 = mat4(7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;\n"
1298 	"	#define FILTER_CODE(i) {						\\\n"
1299 	"			vec4 newb	=	(bins[i]* m2);			\\\n"
1300 	"			newb.xyz	+=	( prev.yzw * m1);		\\\n"
1301 	"			prev = bins[i];							\\\n"
1302 	"			newb.wzy	+=	( bins[i+1].zyx *m1);	\\\n"
1303 	"			bins[i] = newb;}\n"
1304 	"	for (int j=0; j<2; j++)								\n"
1305 	"	{												\n"
1306 	"		vec4 prev  = bins[8];						\n"
1307 	"		bins[9]		 = bins[0];						\n";
1308 
1309 	if(GlobalUtil::_KeepShaderLoop)
1310 	{
1311 		out<<
1312 	"		for (int i=0; i<9; i++)							\n"
1313 	"		{												\n"
1314 	"			FILTER_CODE(i);								\n"
1315 	"		}												\n"
1316 	"	}";
1317 
1318 	}else
1319 	{
1320 		//manually unroll the loop for ATI.
1321 		out <<
1322 	"	   FILTER_CODE(0);\n"
1323 	"	   FILTER_CODE(1);\n"
1324 	"	   FILTER_CODE(2);\n"
1325 	"	   FILTER_CODE(3);\n"
1326 	"	   FILTER_CODE(4);\n"
1327 	"	   FILTER_CODE(5);\n"
1328 	"	   FILTER_CODE(6);\n"
1329 	"	   FILTER_CODE(7);\n"
1330 	"	   FILTER_CODE(8);\n"
1331 	"	}\n";
1332 	}
1333 	//find the maximum voting
1334 	out<<"\n"
1335 	"	vec4 maxh; vec2 maxh2; 	\n"
1336 	"	vec4 maxh4 = max(max(max(max(max(max(max(max(bins[0], bins[1]), bins[2]), \n"
1337 	"			bins[3]), bins[4]), bins[5]), bins[6]), bins[7]), bins[8]);\n"
1338 	"	maxh2 = max(maxh4.xy, maxh4.zw); maxh = vec4(max(maxh2.x, maxh2.y));";
1339 
1340 	std::string testpeak_code;
1341 	std::string savepeak_code;
1342 
1343 	//save two/three/four orientations with the largest votings?
1344 
1345 	if(GlobalUtil::_MaxOrientation>1)
1346 	{
1347 		out<<"\n"
1348 		"	vec4 Orientations = vec4(0.0, 0.0, 0.0, 0.0);				\n"
1349 		"	vec4 weights = vec4(0.0,0.0,0.0,0.0);		";
1350 
1351 		testpeak_code = "\\\n"
1352 		"	{test = greaterThan(bins[i], hh);";
1353 
1354 		//save the orientations in weight-decreasing order
1355 		if(GlobalUtil::_MaxOrientation ==2)
1356 		{
1357 		savepeak_code = "\\\n"
1358 		"			if(weight <=weights.g){}\\\n"
1359 		"			else if(weight >weights.r)\\\n"
1360 		"			{weights.rg = vec2(weight, weights.r); Orientations.rg = vec2(th, Orientations.r);}\\\n"
1361 		"			else {weights.g = weight; Orientations.g = th;}";
1362 		}else if(GlobalUtil::_MaxOrientation ==3)
1363 		{
1364 		savepeak_code = "\\\n"
1365 		"			if(weight <=weights.b){}\\\n"
1366 		"			else if(weight >weights.r)\\\n"
1367 		"			{weights.rgb = vec3(weight, weights.rg); Orientations.rgb = vec3(th, Orientations.rg);}\\\n"
1368 		"			else if(weight >weights.g)\\\n"
1369 		"			{weights.gb = vec2(weight, weights.g); Orientations.gb = vec2(th, Orientations.g);}\\\n"
1370 		"			else {weights.b = weight; Orientations.b = th;}";
1371 		}else
1372 		{
1373 		savepeak_code = "\\\n"
1374 		"			if(weight <=weights.a){}\\\n"
1375 		"			else if(weight >weights.r)\\\n"
1376 		"			{weights = vec4(weight, weights.rgb); Orientations = vec4(th, Orientations.rgb);}\\\n"
1377 		"			else if(weight >weights.g)\\\n"
1378 		"			{weights.gba = vec3(weight, weights.gb); Orientations.gba = vec3(th, Orientations.gb);}\\\n"
1379 		"			else if(weight >weights.b)\\\n"
1380 		"			{weights.ba = vec2(weight, weights.b); Orientations.ba = vec2(th, Orientations.b);}\\\n"
1381 		"			else {weights.a = weight; Orientations.a = th;}";
1382 		}
1383 
1384 	}else
1385 	{
1386 		out<<"\n"
1387 		"	float Orientation;				";
1388 		testpeak_code ="\\\n"
1389 		"	if(npeaks<=0.0){\\\n"
1390 		"	test = equal(bins[i], maxh)	;";
1391 		savepeak_code="\\\n"
1392 		"			npeaks++;	\\\n"
1393 		"			Orientation = th;";
1394 
1395 	}
1396 	//find the peaks
1397 	out <<"\n"
1398 	"	#define FINDPEAK(i, k)"	<<testpeak_code<<"\\\n"
1399 	"	if( any ( test) )							\\\n"
1400 	"	{											\\\n"
1401 	"		if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y )	\\\n"
1402 	"		{											\\\n"
1403 	"		    float	di = -0.5 * (bins[i].y-prevb) / (bins[i].y+prevb-bins[i].x - bins[i].x) ; \\\n"
1404 	"		    float	th = (k+di+0.5);	float weight = bins[i].x;"
1405 				<<savepeak_code<<"\\\n"
1406 	"		}\\\n"
1407 	"		else if(test.g && all( greaterThan(bins[i].yy , bins[i].xz)) )	\\\n"
1408 	"		{											\\\n"
1409 	"		    float	di = -0.5 * (bins[i].z-bins[i].x) / (bins[i].z+bins[i].x-bins[i].y- bins[i].y) ; \\\n"
1410 	"		    float	th = (k+di+1.5);	float weight = bins[i].y;				"
1411 				<<savepeak_code<<"	\\\n"
1412 	"		}\\\n"
1413 	"		if(test.b && all( greaterThan( bins[i].zz , bins[i].yw)) )	\\\n"
1414 	"		{											\\\n"
1415 	"		    float	di = -0.5 * (bins[i].w-bins[i].y) / (bins[i].w+bins[i].y-bins[i].z- bins[i].z) ; \\\n"
1416 	"		    float	th = (k+di+2.5);	float weight = bins[i].z;				"
1417 				<<savepeak_code<<"	\\\n"
1418 	"		}\\\n"
1419 	"		else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x )	\\\n"
1420 	"		{											\\\n"
1421 	"		    float	di = -0.5 * (bins[i+1].x-bins[i].z) / (bins[i+1].x+bins[i].z-bins[i].w - bins[i].w) ; \\\n"
1422 	"		    float	th = (k+di+3.5);	float weight = bins[i].w;				"
1423 				<<savepeak_code<<"	\\\n"
1424 	"		}\\\n"
1425 	"	}}\\\n"
1426 	"	prevb = bins[i].w;";
1427 	//the following loop will be unrolled anyway in fp40,
1428 	//taking more than 1000 instrucsions..
1429 	//....
1430 	if(GlobalUtil::_KeepShaderLoop)
1431 	{
1432 	out<<"\n"
1433 	"	vec4 hh = maxh * ORIENTATION_THRESHOLD;	bvec4 test;	\n"
1434 	"	bins[9] = bins[0];								\n"
1435 	"	float npeaks = 0.0, k = 0.0;						\n"
1436 	"	float prevb	= bins[8].w;						\n"
1437 	"	for (int i = 0; i < 9; i++)						\n"
1438 	"	{\n"
1439 	"		FINDPEAK(i, k);\n"
1440 	"		k = k + 4.0;	\n"
1441 	"	}";
1442 	}else
1443 	{
1444 		//loop unroll for ATI.
1445 	out <<"\n"
1446 	"	vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test;\n"
1447 	"	bins[9] = bins[0];								\n"
1448 	"	float npeaks = 0.0;								\n"
1449 	"	float prevb	= bins[8].w;						\n"
1450 	"	FINDPEAK(0, 0.0);\n"
1451 	"	FINDPEAK(1, 4.0);\n"
1452 	"	FINDPEAK(2, 8.0);\n"
1453 	"	FINDPEAK(3, 12.0);\n"
1454 	"	FINDPEAK(4, 16.0);\n"
1455 	"	FINDPEAK(5, 20.0);\n"
1456 	"	FINDPEAK(6, 24.0);\n"
1457 	"	FINDPEAK(7, 28.0);\n"
1458 	"	FINDPEAK(8, 32.0);\n";
1459 	}
1460 	//WRITE output
1461 	if(GlobalUtil::_MaxOrientation>1)
1462 	{
1463 	out<<"\n"
1464 	"	if(orientation_mode){\n"
1465 	"		npeaks = dot(vec4(1,1,"
1466 			<<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<","
1467 			<<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), vec4(greaterThan(weights, hh)));\n"
1468 	"		gl_FragData[0] = vec4(pos, npeaks, sigma);\n"
1469 	"		gl_FragData[1] = radians((Orientations )*10.0);\n"
1470 	"	}else{\n"
1471 	"		gl_FragData[0] = vec4(pos, radians((Orientations.x)*10.0), sigma);\n"
1472 	"	}\n";
1473 	}else
1474 	{
1475 	out<<"\n"
1476 	"	 gl_FragData[0] = vec4(pos, radians((Orientation)*10.0), sigma);\n";
1477 	}
1478 	//end
1479 	out<<"\n"
1480 	"}\n"<<'\0';
1481 
1482 
1483 }
1484 
SetSimpleOrientationInput(int oTex,float sigma,float sigma_step)1485 void ShaderBagGLSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
1486 {
1487 	glUniform1i(_param_orientation_gtex, 1);
1488 	glUniform1f(_param_orientation_size, sigma);
1489 }
1490 
1491 
1492 
1493 
SetFeatureOrientationParam(int gtex,int width,int height,float sigma,int stex,float step)1494 void ShaderBagGLSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step)
1495 {
1496 	///
1497 	glUniform1i(_param_orientation_gtex, 1);
1498 
1499 	if((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)&& stex)
1500 	{
1501 		//specify texutre for subpixel subscale localization
1502 		glUniform1i(_param_orientation_stex, 2);
1503 	}
1504 
1505 	float size[4];
1506 	size[0] = (float)width;
1507 	size[1] = (float)height;
1508 	size[2] = sigma;
1509 	size[3] = step;
1510 	glUniform4fv(_param_orientation_size, 1, size);
1511 }
1512 
1513 
LoadDescriptorShaderF2()1514 void ShaderBagGLSL::LoadDescriptorShaderF2()
1515 {
1516 	//one shader outpout 128/8 = 16 , each fragout encodes 4
1517 	//const double twopi = 2.0*3.14159265358979323846;
1518 	//const double rpi  = 8.0/twopi;
1519 	ostringstream out;
1520 	out<<setprecision(8);
1521 
1522 	out<<"\n"
1523 	"#define M_PI 3.14159265358979323846\n"
1524 	"#define TWO_PI (2.0*M_PI)\n"
1525 	"#define RPI 1.2732395447351626861510701069801\n"
1526 	"#define WF  size.z\n"
1527 	"uniform sampler2DRect tex;				\n"
1528 	"uniform sampler2DRect gradTex;			\n"
1529 	"uniform vec4 dsize;						\n"
1530 	"uniform vec3 size;						\n"
1531 	"void main()		\n"
1532 	"{\n"
1533 	"	vec2 dim	= size.xy;	//image size			\n"
1534 	"	float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
1535 	"	float idx = 8.0 * fract(index * 0.125) + 8.0 * floor(2.0 * fract(gl_TexCoord[0].y * 0.5));		\n"
1536 	"	index = floor(index*0.125) + 0.49;  \n"
1537 	"	vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
1538 	"	vec2 pos = texture2DRect(tex, coord).xy;		\n"
1539 	"	if(any(lessThanEqual(pos.xy,  vec2(1.0))) || any(greaterThanEqual(pos.xy, dim-1.0)))// discard;	\n"
1540 	"	{ gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
1541 	"	float  anglef = texture2DRect(tex, coord).z;\n"
1542 	"	if(anglef > M_PI) anglef -= TWO_PI;\n"
1543 	"	float sigma = texture2DRect(tex, coord).w; \n"
1544 	"	float spt  = abs(sigma * WF);	//default to be 3*sigma	\n";
1545 
1546 	//rotation
1547 	out<<
1548 	"	vec4 cscs, rots;								\n"
1549 	"	cscs.y = sin(anglef);	cscs.x = cos(anglef);	\n"
1550 	"	cscs.zw = - cscs.xy;							\n"
1551 	"	rots = cscs /spt;								\n"
1552 	"	cscs *= spt; \n";
1553 
1554 	//here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
1555 	//and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
1556 	//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
1557 	//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
1558 
1559 	out<<
1560 	"vec4 temp; vec2 pt, offsetpt;				\n"
1561 	"	/*the fraction part of idx is .5*/			\n"
1562 	"	offsetpt.x = 4.0* fract(idx*0.25) - 2.0;				\n"
1563 	"	offsetpt.y = floor(idx*0.25) - 1.5;			\n"
1564 	"	temp = cscs.xwyx*offsetpt.xyxy;				\n"
1565 	"	pt = pos + temp.xz + temp.yw;				\n";
1566 
1567 	//get a horizontal bounding box of the rotated rectangle
1568 	out<<
1569 	"	vec2 bwin = abs(cscs.xy);					\n"
1570 	"	float bsz = bwin.x + bwin.y;					\n"
1571 	"	vec4 sz;					\n"
1572 	"	sz.xy = max(pt - vec2(bsz), vec2(1,1));\n"
1573 	"	sz.zw = min(pt + vec2(bsz), dim - vec2(2, 2));		\n"
1574 	"	sz = floor(sz)+0.5;"; //move sample point to pixel center
1575 	//get voting for two box
1576 
1577 	out<<"\n"
1578 	"	vec4 DA, DB; vec2 spos;			\n"
1579 	"	DA = DB  = vec4(0.0, 0.0, 0.0, 0.0);		\n"
1580 	"	for(spos.y = sz.y; spos.y <= sz.w;	spos.y+=1.0)				\n"
1581 	"	{																\n"
1582 	"		for(spos.x = sz.x; spos.x <= sz.z;	spos.x+=1.0)			\n"
1583 	"		{															\n"
1584 	"			vec2 diff = spos - pt;								\n"
1585 	"			temp = rots.xywx * diff.xyxy;\n"
1586 	"			vec2 nxy = (temp.xz + temp.yw); \n"
1587 	"			vec2 nxyn = abs(nxy);			\n"
1588 	"			if(all( lessThan(nxyn, vec2(1.0)) ))\n"
1589 	"			{\n"
1590 	"				vec4 cc = texture2DRect(gradTex, spos);						\n"
1591 	"				float mod = cc.b;	float angle = cc.a;					\n"
1592 	"				float theta0 = RPI * (anglef - angle);				\n"
1593 	"				float theta = theta0 < 0.0? theta0 + 8.0 : theta0;;\n"
1594 	"				diff = nxy + offsetpt.xy;								\n"
1595 	"				float ww = exp(-0.125*dot(diff, diff));\n"
1596 	"				vec2 weights = vec2(1) - nxyn;\n"
1597 	"				float weight = weights.x * weights.y *mod*ww; \n"
1598 	"				float theta1 = floor(theta); \n"
1599 	"				float weight2 = (theta - theta1) * weight;\n"
1600 	"				float weight1 = weight - weight2;\n"
1601 	"				DA += vec4(equal(vec4(theta1),  vec4(0, 1, 2, 3)))*weight1;\n"
1602 	"				DA += vec4(equal(vec4(theta1),  vec4(7, 0, 1, 2)))*weight2; \n"
1603 	"				DB += vec4(equal(vec4(theta1),  vec4(4, 5, 6, 7)))*weight1;\n"
1604 	"				DB += vec4(equal(vec4(theta1),  vec4(3, 4, 5, 6)))*weight2; \n"
1605 	"			}\n"
1606 	"		}\n"
1607 	"	}\n";
1608 
1609 	out<<
1610 	"	 gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
1611 	"}\n"<<'\0';
1612 
1613 	ProgramGLSL * program =  new ProgramGLSL(out.str().c_str());
1614 
1615 	if(program->IsNative())
1616 	{
1617 		s_descriptor_fp = program ;
1618 		_param_descriptor_gtex = glGetUniformLocation(*program, "gradTex");
1619 		_param_descriptor_size = glGetUniformLocation(*program, "size");
1620 		_param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
1621 	}else
1622 	{
1623 		delete program;
1624 	}
1625 
1626 
1627 }
1628 
LoadDescriptorShader()1629 void ShaderBagGLSL::LoadDescriptorShader()
1630 {
1631 	GlobalUtil::_DescriptorPPT = 16;
1632 	LoadDescriptorShaderF2();
1633 }
1634 
1635 
SetFeatureDescirptorParam(int gtex,int otex,float dwidth,float fwidth,float width,float height,float sigma)1636 void ShaderBagGLSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
1637 {
1638 	///
1639 	glUniform1i(_param_descriptor_gtex, 1);
1640 
1641 	float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
1642 	glUniform4fv(_param_descriptor_dsize, 1, dsize);
1643 	float size[3];
1644 	size[0] = width;
1645 	size[1] = height;
1646 	size[2] = GlobalUtil::_DescriptorWindowFactor;
1647 	glUniform3fv(_param_descriptor_size, 1, size);
1648 
1649 }
1650 
1651 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1652 
LoadFixedShaders()1653 void ShaderBagPKSL::LoadFixedShaders()
1654 {
1655 	ProgramGLSL * program;
1656 
1657 
1658 	s_gray = new ProgramGLSL(
1659 	"uniform sampler2DRect tex; void main(){\n"
1660 	"float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex,gl_TexCoord[0].xy ).rgb);\n"
1661 	"gl_FragColor= vec4(intensity, intensity, intensity, 1.0);}"	);
1662 
1663 
1664 	s_sampling = new ProgramGLSL(
1665 	"uniform sampler2DRect tex; void main(){\n"
1666 	"gl_FragColor= vec4(	texture2DRect(tex,gl_TexCoord[0].st ).r,texture2DRect(tex,gl_TexCoord[1].st ).r,\n"
1667 	"						texture2DRect(tex,gl_TexCoord[2].st ).r,texture2DRect(tex,gl_TexCoord[3].st ).r);}"	);
1668 
1669 
1670 	s_margin_copy = program = new ProgramGLSL(
1671 	"uniform sampler2DRect tex;  uniform vec4 truncate; void main(){\n"
1672 	"vec4 cc = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate.xy)); \n"
1673 	"bvec2 ob = lessThan(gl_TexCoord[0].xy, truncate.xy);\n"
1674 	"if(ob.y) { gl_FragColor = (truncate.z ==0.0 ? cc.rrbb : cc.ggaa); } \n"
1675 	"else if(ob.x) {gl_FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"
1676 	"else {	vec4 weights = vec4(vec4(0.0, 1.0, 2.0, 3.0) == truncate.wwww);\n"
1677 	"float v = dot(weights, cc); gl_FragColor = vec4(v);}}");
1678 
1679 	_param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
1680 
1681 
1682 
1683 	s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");
1684 
1685 
1686 
1687 	s_grad_pass = program = new ProgramGLSL(
1688 	"uniform sampler2DRect tex; uniform sampler2DRect texp; void main ()\n"
1689 	"{\n"
1690 	"	vec4 v1, v2, gg;\n"
1691 	"	vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
1692 	"	vec4 cp = texture2DRect(texp, gl_TexCoord[0].xy);\n"
1693 	"	gl_FragData[0] = cc - cp; \n"
1694 	"	vec4 cl = texture2DRect(tex, gl_TexCoord[1].xy); vec4 cr = texture2DRect(tex, gl_TexCoord[2].xy);\n"
1695 	"	vec4 cd = texture2DRect(tex, gl_TexCoord[3].xy); vec4 cu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
1696 	"	vec4 dx = (vec4(cr.rb, cc.ga) - vec4(cc.rb, cl.ga)).zxwy;\n"
1697 	"	vec4 dy = (vec4(cu.rg, cc.ba) - vec4(cc.rg, cd.ba)).zwxy;\n"
1698 	"	vec4 grad = 0.5 * sqrt(dx*dx + dy * dy);\n"
1699 	"	gl_FragData[1] = grad;\n"
1700 	"	vec4 invalid = vec4(equal(grad, vec4(0.0)));	\n"
1701 	"	vec4 ov = atan(dy, dx + invalid);		\n"
1702 	"	gl_FragData[2] = ov; \n"
1703 	"}\n\0"); //when
1704 
1705 	_param_grad_pass_texp = glGetUniformLocation(*program, "texp");
1706 
1707 
1708 	GlobalUtil::_OrientationPack2 = 0;
1709 	LoadOrientationShader();
1710 
1711 	if(s_orientation == NULL)
1712 	{
1713 		//Load a simplified version if the right version is not supported
1714 		s_orientation = program =  new ProgramGLSL(
1715 		"uniform sampler2DRect tex; uniform sampler2DRect oTex; uniform vec2 size; void main(){\n"
1716 		"	vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
1717 		"	vec2 co = cc.xy * 0.5; \n"
1718 		"	vec4 oo = texture2DRect(oTex, co);\n"
1719 		"	bvec2 bo = lessThan(fract(co), vec2(0.5)); \n"
1720 		"	float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"
1721 		"	gl_FragColor = vec4(cc.rg, o, size.x * pow(size.y, cc.a));}");
1722 
1723 		_param_orientation_gtex= glGetUniformLocation(*program, "oTex");
1724 		_param_orientation_size= glGetUniformLocation(*program, "size");
1725 		GlobalUtil::_MaxOrientation = 0;
1726 		GlobalUtil::_FullSupported = 0;
1727 		std::cerr<<"Orientation simplified on this hardware"<<endl;
1728 	}
1729 
1730 	if(GlobalUtil::_DescriptorPPT)
1731 	{
1732 		LoadDescriptorShader();
1733 		if(s_descriptor_fp == NULL)
1734 		{
1735 			GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0;
1736 			std::cerr<<"Descriptor ignored on this hardware"<<endl;
1737 		}
1738 	}
1739 }
1740 
1741 
LoadDisplayShaders()1742 void ShaderBagPKSL::LoadDisplayShaders()
1743 {
1744 	ProgramGLSL * program;
1745 
1746 	s_copy_key = new ProgramGLSL(
1747 	"uniform sampler2DRect tex;void main(){\n"
1748 	"gl_FragColor= vec4(texture2DRect(tex, gl_TexCoord[0].xy).rg, 0,1);}");
1749 
1750 	//shader used to write a vertex buffer object
1751 	//which is used to draw the quads of each feature
1752 	s_vertex_list = program = new ProgramGLSL(
1753 	"uniform sampler2DRect tex; uniform vec4 sizes; void main(){\n"
1754 	"float fwidth = sizes.y; \n"
1755 	"float twidth = sizes.z; \n"
1756 	"float rwidth = sizes.w; \n"
1757 	"float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
1758 	"float px = mod(index, twidth);\n"
1759 	"vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
1760 	"vec4 cc = texture2DRect(tex, tpos );\n"
1761 	"float size = 3.0 * cc.a; \n"
1762 	"gl_FragColor.zw = vec2(0.0, 1.0);\n"
1763 	"if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy;}else \n"
1764 	"{\n"
1765 	"	float type = fract(px);\n"
1766 	"	vec2 dxy; float s, c;\n"
1767 	"	dxy.x = type < 0.1 ? 0.0 : (((type <0.5) || (type > 0.9))? size : -size);\n"
1768 	"	dxy.y = type < 0.2 ? 0.0 : (((type < 0.3) || (type > 0.7) )? -size :size); \n"
1769 	"	s = sin(cc.b); c = cos(cc.b); \n"
1770 	"	gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
1771 	"	gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
1772 	"}\n\0");
1773 	/*gl_FragColor = vec4(tpos, 0.0, 1.0);}\n\0");*/
1774 
1775 	_param_genvbo_size = glGetUniformLocation(*program, "sizes");
1776 
1777 	s_display_gaussian = new ProgramGLSL(
1778 	"uniform sampler2DRect tex; void main(){\n"
1779     "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy);	bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1780     "float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); gl_FragColor = vec4(vec3(v), 1.0);}");
1781 
1782 	s_display_dog =  new ProgramGLSL(
1783 	"uniform sampler2DRect tex; void main(){\n"
1784 	"vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1785 	"float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"
1786 	"gl_FragColor = vec4(g, g, g, 1.0);}" );
1787 
1788 
1789 	s_display_grad = new ProgramGLSL(
1790 	"uniform sampler2DRect tex; void main(){\n"
1791 	"vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1792 	"float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); gl_FragColor = vec4(5.0 *vec3(v), 1.0); }");
1793 
1794 	s_display_keys= new ProgramGLSL(
1795 	"uniform sampler2DRect tex; void main(){\n"
1796 	"vec4 oc = texture2DRect(tex, gl_TexCoord[0].xy); \n"
1797 	"vec4 cc = vec4(equal(abs(oc.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
1798 	"bvec2 ff = lessThan(fract(gl_TexCoord[0].xy) , vec2(0.5));\n"
1799 	"float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"
1800 	"if(v == 0.0) discard;	\n"
1801 	"else if(oc.r > 0.0) gl_FragColor = vec4(1.0, 0.0, 0,1.0); \n"
1802 	"else gl_FragColor = vec4(0.0,1.0,0.0,1.0);	}" );
1803 }
1804 
LoadOrientationShader(void)1805 void ShaderBagPKSL::LoadOrientationShader(void)
1806 {
1807 	ostringstream out;
1808 	if(GlobalUtil::_IsNvidia)
1809 	{
1810 		out <<	"#pragma optionNV(ifcvt none)\n"
1811 				"#pragma optionNV(unroll all)\n";
1812 	}
1813 	out<<"\n"
1814 	"#define GAUSSIAN_WF float("<<GlobalUtil::_OrientationGaussianFactor<<") \n"
1815 	"#define SAMPLE_WF float("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
1816 	"#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
1817 	"uniform sampler2DRect tex;	uniform sampler2DRect gtex;\n"
1818 	"uniform sampler2DRect otex; uniform vec4 size;\n"
1819 	"void main()		\n"
1820 	"{													\n"
1821 	"	vec4 bins[10];								\n"
1822 	"	bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0);	\n"
1823 	"	bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0);	\n"
1824 	"	bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0);	\n"
1825 	"	vec4 sift = texture2DRect(tex, gl_TexCoord[0].xy);	\n"
1826 	"	vec2 pos = sift.xy; \n"
1827 	"	bool orientation_mode = (size.z != 0.0);		\n"
1828 	"	float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"
1829 	"	//bool fixed_orientation = (size.z < 0.0);		\n"
1830 	"	if(size.z < 0.0) {gl_FragData[0] = vec4(pos, 0.0, sigma); return;}"
1831 	"	float gsigma = sigma * GAUSSIAN_WF;				\n"
1832 	"	vec2 win = abs(vec2(sigma * (SAMPLE_WF * GAUSSIAN_WF)));	\n"
1833 	"	vec2 dim = size.xy;							\n"
1834 	"	vec4 dist_threshold = vec4(win.x*win.x+0.5);			\n"
1835 	"	float factor = -0.5/(gsigma*gsigma);			\n"
1836 	"	vec4 sz;	vec2 spos;						\n"
1837 	"	//if(any(pos.xy <= float(1))) discard;					\n"
1838 	"	sz.xy = max( pos - win, vec2(2.0,2.0));			\n"
1839 	"	sz.zw = min( pos + win, dim-vec2(3.0));				\n"
1840 	"	sz = floor(sz*0.5) + 0.5; ";
1841 		//loop to get the histogram
1842 
1843 	out<<"\n"
1844 	"	for(spos.y = sz.y; spos.y <= sz.w;	spos.y+=1.0)				\n"
1845 	"	{																\n"
1846 	"		for(spos.x = sz.x; spos.x <= sz.z;	spos.x+=1.0)			\n"
1847 	"		{															\n"
1848 	"			vec2 offset = 2.0 * spos - pos - vec2(0.5);					\n"
1849 	"			vec4 off = vec4(offset, offset + vec2(1));				\n"
1850 	"			vec4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww;	\n"
1851 	"			bvec4 inside = lessThan(distsq, dist_threshold);			\n"
1852 	"			if(any(inside))										\n"
1853 	"			{														\n"
1854 	"				vec4 gg = texture2DRect(gtex, spos);				\n"
1855 	"				vec4 oo = texture2DRect(otex, spos);				\n"
1856 	"				vec4 weight = gg * exp(distsq * factor);			\n"
1857 	"				vec4 idxv  = floor(degrees(oo)*0.1); 				\n"
1858 	"				idxv+= (vec4(lessThan(idxv, vec4(0.0)))*36.0); 			\n"
1859 	"				vec4 vidx = fract(idxv * 0.25) * 4.0;//mod(idxv, 4.0);	\n";
1860 	//
1861 	if(GlobalUtil::_UseDynamicIndexing)
1862 	{
1863 		// it might be slow on some GPUs
1864 		out<<"\n"
1865 	"				for(int i = 0 ; i < 4; i++)\n"
1866 	"				{\n"
1867 	"					if(inside[i])\n"
1868 	"					{\n"
1869 	"						float idx = idxv[i];								\n"
1870 	"						vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0.0,1.0,2.0,3.0)));	\n"
1871 	"						int iidx = int(floor(idx*0.25));	\n"
1872 	"						bins[iidx]+=inc;					\n"
1873 	"					}										\n"
1874 	"				}											\n"
1875 	"			}												\n"
1876 	"		}													\n"
1877 	"	}";
1878 
1879 	}else
1880 	{
1881 		//nvfp40 still does not support dynamic array indexing
1882 		//unrolled binary search
1883 		//it seems to be faster than the dyanmic indexing version on some GPUs
1884 		out<<"\n"
1885 	"				for(int i = 0 ; i < 4; i++)\n"
1886 	"				{\n"
1887 	"					if(inside[i])\n"
1888 	"					{\n"
1889 	"						float idx = idxv[i]; 										\n"
1890 	"						vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3)));	\n"
1891 	"						if(idx < 16.0)							\n"
1892 	"						{										\n"
1893 	"							if(idx < 8.0)							\n"
1894 	"							{									\n"
1895 	"								if(idx < 4.0)	{	bins[0]+=inc;}	\n"
1896 	"								else		{	bins[1]+=inc;}	\n"
1897 	"							}else								\n"
1898 	"							{									\n"
1899 	"								if(idx < 12.0){	bins[2]+=inc;}	\n"
1900 	"								else		{	bins[3]+=inc;}	\n"
1901 	"							}									\n"
1902 	"						}else if(idx < 32.0)						\n"
1903 	"						{										\n"
1904 	"							if(idx < 24.0)						\n"
1905 	"							{									\n"
1906 	"								if(idx <20.0)	{	bins[4]+=inc;}	\n"
1907 	"								else		{	bins[5]+=inc;}	\n"
1908 	"							}else								\n"
1909 	"							{									\n"
1910 	"								if(idx < 28.0){	bins[6]+=inc;}	\n"
1911 	"								else		{	bins[7]+=inc;}	\n"
1912 	"							}									\n"
1913 	"						}else 						\n"
1914 	"						{										\n"
1915 	"							bins[8]+=inc;						\n"
1916 	"						}										\n"
1917 	"					}											\n"
1918 	"				}												\n"
1919 	"			}										\n"
1920 	"		}											\n"
1921 	"	}";
1922 
1923 	}
1924 
1925 	//reuse the code from the unpacked version..
1926 	ShaderBagGLSL::WriteOrientationCodeToStream(out);
1927 
1928 
1929 
1930 	ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
1931 	if(program->IsNative())
1932 	{
1933 		s_orientation = program ;
1934 		_param_orientation_gtex = glGetUniformLocation(*program, "gtex");
1935 		_param_orientation_otex = glGetUniformLocation(*program, "otex");
1936 		_param_orientation_size = glGetUniformLocation(*program, "size");
1937 	}else
1938 	{
1939 		delete program;
1940 	}
1941 }
1942 
SetGenListStartParam(float width,int tex0)1943 void ShaderBagPKSL::SetGenListStartParam(float width, int tex0)
1944 {
1945 	glUniform1f(_param_ftex_width, width);
1946 	glUniform1i(_param_genlist_start_tex0, 0);
1947 }
1948 
LoadGenListShader(int ndoglev,int nlev)1949 void ShaderBagPKSL::LoadGenListShader(int ndoglev,int nlev)
1950 {
1951 	ProgramGLSL * program;
1952 
1953 	s_genlist_init_tight = new ProgramGLSL(
1954 	"uniform sampler2DRect tex; void main ()\n"
1955 	"{\n"
1956 	"	vec4 key = vec4(texture2DRect(tex, gl_TexCoord[0].xy).r, \n"
1957 	"					texture2DRect(tex, gl_TexCoord[1].xy).r, \n"
1958 	"					texture2DRect(tex, gl_TexCoord[2].xy).r, \n"
1959 	"					texture2DRect(tex, gl_TexCoord[3].xy).r); \n"
1960 	"					gl_FragColor = vec4(notEqual(key, vec4(0.0))); \n"
1961 	"}");
1962 
1963 	s_genlist_init_ex = program = new ProgramGLSL(
1964 	"uniform sampler2DRect tex; uniform vec4 bbox; void main ()\n"
1965 	"{\n"
1966 	"	vec4 helper1 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[0].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1967 	"	vec4 helper2 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[1].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1968 	"	vec4 helper3 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[2].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1969 	"	vec4 helper4 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[3].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1970 	"	vec4 bx1 = vec4(lessThan(gl_TexCoord[0].xxyy, bbox)); \n"
1971 	"	vec4 bx4 = vec4(lessThan(gl_TexCoord[3].xxyy, bbox)); \n"
1972 	"	vec4 bx2 = vec4(bx4.xy, bx1.zw); \n"
1973 	"	vec4 bx3 = vec4(bx1.xy, bx4.zw);\n"
1974 	"	helper1 = min(min(bx1.xyxy, bx1.zzww), helper1);\n"
1975 	"	helper2 = min(min(bx2.xyxy, bx2.zzww), helper2);\n"
1976 	"	helper3 = min(min(bx3.xyxy, bx3.zzww), helper3);\n"
1977 	"	helper4 = min(min(bx4.xyxy, bx4.zzww), helper4);\n"
1978 	"	gl_FragColor.r = float(any(greaterThan(max(helper1.xy, helper1.zw), vec2(0.0))));	\n"
1979 	"	gl_FragColor.g = float(any(greaterThan(max(helper2.xy, helper2.zw), vec2(0.0))));	\n"
1980 	"	gl_FragColor.b = float(any(greaterThan(max(helper3.xy, helper3.zw), vec2(0.0))));	\n"
1981 	"	gl_FragColor.a = float(any(greaterThan(max(helper4.xy, helper4.zw), vec2(0.0))));	\n"
1982 	"}");
1983 	_param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
1984 
1985 	s_genlist_end = program = new ProgramGLSL(
1986 		GlobalUtil::_KeepExtremumSign == 0 ?
1987 
1988 	"uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
1989 	"{\n"
1990 	"	vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
1991 	"	vec2 pos = tc.rg; float index = tc.b;\n"
1992 	"	vec4 tk = texture2DRect( ktex, pos); \n"
1993 	"	vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
1994 	"	vec2 opos; \n"
1995 	"	opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
1996 	"	opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
1997 	"	gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, 1.0, tk.w);\n"
1998 	"}" :
1999 
2000 	"uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
2001 	"{\n"
2002 	"	vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
2003 	"	vec2 pos = tc.rg; float index = tc.b;\n"
2004 	"	vec4 tk = texture2DRect( ktex, pos); \n"
2005 	"	vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))) \n"
2006 	"	vec2 opos; \n"
2007 	"	opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
2008 	"	opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
2009 	"	gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, sign(tk.r), tk.w);\n"
2010 	"}"
2011 	);
2012 
2013 	_param_genlist_end_ktex = glGetUniformLocation(*program, "ktex");
2014 
2015 	//reduction ...
2016 	s_genlist_histo = new ProgramGLSL(
2017 	"uniform sampler2DRect tex; void main ()\n"
2018 	"{\n"
2019 	"	vec4 helper; vec4 helper2; \n"
2020 	"	helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
2021 	"	helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
2022 	"	gl_FragColor.rg = helper2.xz + helper2.yw;\n"
2023 	"	helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
2024 	"	helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
2025 	"	gl_FragColor.ba= helper2.xz+helper2.yw;\n"
2026 	"}");
2027 
2028 
2029 	//read of the first part, which generates tex coordinates
2030 
2031 	s_genlist_start= program =  ShaderBagGLSL::LoadGenListStepShader(1, 1);
2032 	_param_ftex_width= glGetUniformLocation(*program, "width");
2033 	_param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
2034 	//stepping
2035 	s_genlist_step = program = ShaderBagGLSL::LoadGenListStepShader(0, 1);
2036 	_param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
2037 
2038 }
UnloadProgram(void)2039 void ShaderBagPKSL::UnloadProgram(void)
2040 {
2041 	glUseProgram(0);
2042 }
LoadKeypointShader(float dog_threshold,float edge_threshold)2043 void ShaderBagPKSL::LoadKeypointShader(float dog_threshold, float edge_threshold)
2044 {
2045 	float threshold0 = dog_threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
2046 	float threshold1 = dog_threshold;
2047 	float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
2048 	ostringstream out;;
2049 	out<<setprecision(8);
2050 
2051 	if(GlobalUtil::_IsNvidia)
2052 	{
2053 		out << "#pragma optionNV(ifcvt none)\n"
2054 				"#pragma optionNV(unroll all)\n";
2055 
2056 	}
2057 	if(GlobalUtil::_KeepShaderLoop)
2058 	{
2059 		out <<  "#define REPEAT4(FUNCTION)\\\n"
2060 				"for(int i = 0; i < 4; ++i)\\\n"
2061 				"{\\\n"
2062 				"	FUNCTION(i);\\\n"
2063 				"}\n";
2064 	}else
2065 	{
2066 		//loop unroll
2067 		out <<  "#define REPEAT4(FUNCTION)\\\n"
2068 				"FUNCTION(0);\\\n"
2069 				"FUNCTION(1);\\\n"
2070 				"FUNCTION(2);\\\n"
2071 				"FUNCTION(3);\n";
2072 	}
2073 	//tex(X)(Y)
2074 	//X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)
2075 	//Y: (CDU) (CENTER 0, DOWN -1, UP    +1)
2076 
2077 	if(GlobalUtil::_DarknessAdaption)
2078 	{
2079 		out <<	"#define THRESHOLD0(i) (" << threshold0 << "* ii[i])\n"
2080 				"#define THRESHOLD1 (" << threshold1 << "* ii[0])\n"
2081 				"#define THRESHOLD2 " << threshold2 << "\n"
2082 				"#define DEFINE_EXTRA() vec4 ii = texture2DRect(texI, gl_TexCoord[0].xy); "
2083 				"ii = min(2.0 * ii + 0.1, 1.0) \n"
2084 				"#define MOVE_EXTRA(idx)	ii[0] = ii[idx]\n";
2085 		out << "uniform sampler2DRect texI;\n";
2086 	}else
2087 	{
2088 		out <<	"#define THRESHOLD0(i) " << threshold0 << "\n"
2089 				"#define THRESHOLD1 " << threshold1 << "\n"
2090 				"#define THRESHOLD2 " << threshold2 << "\n"
2091 				"#define DEFINE_EXTRA()\n"
2092 				"#define MOVE_EXTRA(idx) \n"	;
2093 	}
2094 
2095 	out<<
2096 	"uniform sampler2DRect tex; uniform sampler2DRect texU;\n"
2097 	"uniform sampler2DRect texD; void main ()\n"
2098 	"{\n"
2099 	"	vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
2100 	"	vec4 ccc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
2101 	"	vec4 clc = texture2DRect(tex, gl_TexCoord[1].xy);\n"
2102 	"	vec4 crc = texture2DRect(tex, gl_TexCoord[2].xy);\n"
2103 	"	vec4 ccd = texture2DRect(tex, gl_TexCoord[3].xy);\n"
2104 	"	vec4 ccu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
2105 	"	vec4 cld = texture2DRect(tex, gl_TexCoord[5].xy);\n"
2106 	"	vec4 clu = texture2DRect(tex, gl_TexCoord[6].xy);\n"
2107 	"	vec4 crd = texture2DRect(tex, gl_TexCoord[7].xy);\n"
2108 	"	vec4 cru = texture2DRect(tex, TexRU.xy);\n"
2109 	"	vec4  cc = ccc;\n"
2110 	"	vec4  v1[4], v2[4];\n"
2111 	"	v1[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2112 	"	v1[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2113 	"	v1[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2114 	"	v1[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2115 	"	v2[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2116 	"	v2[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2117 	"	v2[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2118 	"	v2[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2119 	"	DEFINE_EXTRA();\n";
2120 
2121 	//test against 8 neighbours
2122 	//use variable to identify type of extremum
2123 	//1.0 for local maximum and -1.0 for minimum
2124 	out <<
2125 	"	vec4 key = vec4(0.0); \n"
2126 	"	#define KEYTEST_STEP0(i) \\\n"
2127 	"	{\\\n"
2128 	"		bvec4 test1 = greaterThan(vec4(cc[i]), max(v1[i], v2[i])), test2 = lessThan(vec4(cc[i]), min(v1[i], v2[i]));\\\n"
2129 	"		key[i] = cc[i] > float(THRESHOLD0(i)) && all(test1)?1.0: 0.0;\\\n"
2130 	"		key[i] = cc[i] < float(-THRESHOLD0(i)) && all(test2)? -1.0: key[i];\\\n"
2131 	"	}\n"
2132 	"	REPEAT4(KEYTEST_STEP0);\n"
2133 	"	if(gl_TexCoord[0].x < 1.0) {key.rb = vec2(0.0);}\n"
2134 	"	if(gl_TexCoord[0].y < 1.0) {key.rg = vec2(0.0);}\n"
2135 	"	gl_FragColor = vec4(0.0);\n"
2136 	"	if(any(notEqual(key, vec4(0.0)))) {\n";
2137 
2138 	//do edge supression first..
2139 	//vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
2140 	//vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
2141 
2142 	out<<
2143 	"	float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"
2144 	"	#define EDGE_SUPPRESION(i) \\\n"
2145 	"	if(key[i] != 0.0)\\\n"
2146 	"	{\\\n"
2147 	"		vec4 D2 = v1[i].xyzw - cc[i];\\\n"
2148 	"		vec2 D4 = v2[i].xw - v2[i].yz;\\\n"
2149 	"		vec2 D5 = 0.5*(v1[i].yw-v1[i].xz); \\\n"
2150 	"		fx[i] = D5.x;	fy[i] = D5.y ;\\\n"
2151 	"		fxx[i] = D2.x + D2.y;\\\n"
2152 	"		fyy[i] = D2.z + D2.w;\\\n"
2153 	"		fxy[i] = 0.25*(D4.x + D4.y);\\\n"
2154 	"		float fxx_plus_fyy = fxx[i] + fyy[i];\\\n"
2155 	"		float score_up = fxx_plus_fyy*fxx_plus_fyy; \\\n"
2156 	"		float score_down = (fxx[i]*fyy[i] - fxy[i]*fxy[i]);\\\n"
2157 	"		if( score_down <= 0.0 || score_up > THRESHOLD2 * score_down)key[i] = 0.0;\\\n"
2158 	"	}\n"
2159 	"	REPEAT4(EDGE_SUPPRESION);\n"
2160 	"	if(any(notEqual(key, vec4(0.0)))) {\n";
2161 
2162 	////////////////////////////////////////////////
2163 	//read 9 pixels of upper/lower level
2164 	out<<
2165 	"	vec4  v4[4], v5[4], v6[4];\n"
2166 	"	ccc = texture2DRect(texU, gl_TexCoord[0].xy);\n"
2167 	"	clc = texture2DRect(texU, gl_TexCoord[1].xy);\n"
2168 	"	crc = texture2DRect(texU, gl_TexCoord[2].xy);\n"
2169 	"	ccd = texture2DRect(texU, gl_TexCoord[3].xy);\n"
2170 	"	ccu = texture2DRect(texU, gl_TexCoord[4].xy);\n"
2171 	"	cld = texture2DRect(texU, gl_TexCoord[5].xy);\n"
2172 	"	clu = texture2DRect(texU, gl_TexCoord[6].xy);\n"
2173 	"	crd = texture2DRect(texU, gl_TexCoord[7].xy);\n"
2174 	"	cru = texture2DRect(texU, TexRU.xy);\n"
2175 	"	vec4 cu = ccc;\n"
2176 	"	v4[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2177 	"	v4[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2178 	"	v4[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2179 	"	v4[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2180 	"	v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2181 	"	v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2182 	"	v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2183 	"	v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2184 	<<
2185 	"	#define KEYTEST_STEP1(i)\\\n"
2186 	"	if(key[i] == 1.0)\\\n"
2187 	"	{\\\n"
2188 	"		bvec4 test = lessThan(vec4(cc[i]), max(v4[i], v6[i])); \\\n"
2189 	"		if(cc[i] < cu[i] || any(test))key[i] = 0.0; \\\n"
2190 	"	}else if(key[i] == -1.0)\\\n"
2191 	"	{\\\n"
2192 	"		bvec4 test = greaterThan(vec4(cc[i]), min(v4[i], v6[i])); \\\n"
2193 	"		if(cc[i] > cu[i] || any(test) )key[i] = 0.0; \\\n"
2194 	"	}\n"
2195 	"	REPEAT4(KEYTEST_STEP1);\n"
2196 	"	if(any(notEqual(key, vec4(0.0)))) { \n"
2197 	<<
2198 	"	ccc = texture2DRect(texD, gl_TexCoord[0].xy);\n"
2199 	"	clc = texture2DRect(texD, gl_TexCoord[1].xy);\n"
2200 	"	crc = texture2DRect(texD, gl_TexCoord[2].xy);\n"
2201 	"	ccd = texture2DRect(texD, gl_TexCoord[3].xy);\n"
2202 	"	ccu = texture2DRect(texD, gl_TexCoord[4].xy);\n"
2203 	"	cld = texture2DRect(texD, gl_TexCoord[5].xy);\n"
2204 	"	clu = texture2DRect(texD, gl_TexCoord[6].xy);\n"
2205 	"	crd = texture2DRect(texD, gl_TexCoord[7].xy);\n"
2206 	"	cru = texture2DRect(texD, TexRU.xy);\n"
2207 	"	vec4 cd = ccc;\n"
2208 	"	v5[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2209 	"	v5[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2210 	"	v5[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2211 	"	v5[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2212 	"	v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2213 	"	v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2214 	"	v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2215 	"	v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2216 	<<
2217 	"	#define KEYTEST_STEP2(i)\\\n"
2218 	"	if(key[i] == 1.0)\\\n"
2219 	"	{\\\n"
2220 	"		bvec4 test = lessThan(vec4(cc[i]), max(v5[i], v6[i]));\\\n"
2221 	"		if(cc[i] < cd[i] || any(test))key[i] = 0.0; \\\n"
2222 	"	}else if(key[i] == -1.0)\\\n"
2223 	"	{\\\n"
2224 	"		bvec4 test = greaterThan(vec4(cc[i]), min(v5[i], v6[i]));\\\n"
2225 	"		if(cc[i] > cd[i] || any(test))key[i] = 0.0; \\\n"
2226 	"	}\n"
2227 	"	REPEAT4(KEYTEST_STEP2);\n"
2228 	"	float keysum = dot(abs(key), vec4(1, 1, 1, 1)) ;\n"
2229 	"	//assume there is only one keypoint in the four. \n"
2230 	"	if(keysum==1.0) {\n";
2231 
2232 	//////////////////////////////////////////////////////////////////////
2233 	if(GlobalUtil::_SubpixelLocalization)
2234 
2235 	out <<
2236 	"	vec3 offset = vec3(0.0, 0.0, 0.0); \n"
2237 	"	#define TESTMOVE_KEYPOINT(idx) \\\n"
2238 	"	if(key[idx] != 0.0) \\\n"
2239 	"	{\\\n"
2240 	"		cu[0] = cu[idx];	cd[0] = cd[idx];	cc[0] = cc[idx];	\\\n"
2241 	"		v4[0] = v4[idx];	v5[0] = v5[idx];						\\\n"
2242 	"		fxy[0] = fxy[idx];	fxx[0] = fxx[idx];	fyy[0] = fyy[idx];	\\\n"
2243 	"		fx[0] = fx[idx];	fy[0] = fy[idx];	MOVE_EXTRA(idx);  \\\n"
2244 	"	}\n"
2245 	"	TESTMOVE_KEYPOINT(1);\n"
2246 	"	TESTMOVE_KEYPOINT(2);\n"
2247 	"	TESTMOVE_KEYPOINT(3);\n"
2248 	<<
2249 
2250 	"	float fs = 0.5*( cu[0] - cd[0] );				\n"
2251 	"	float fss = cu[0] + cd[0] - cc[0] - cc[0];\n"
2252 	"	float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);\n"
2253 	"	float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);\n"
2254 	"	vec4 A0, A1, A2 ;			\n"
2255 	"	A0 = vec4(fxx[0], fxy[0], fxs, -fx[0]);	\n"
2256 	"	A1 = vec4(fxy[0], fyy[0], fys, -fy[0]);	\n"
2257 	"	A2 = vec4(fxs, fys, fss, -fs);	\n"
2258 	"	vec3 x3 = abs(vec3(fxx[0], fxy[0], fxs));		\n"
2259 	"	float maxa = max(max(x3.x, x3.y), x3.z);	\n"
2260 	"	if(maxa >= 1e-10 ) \n"
2261 	"	{												\n"
2262 	"		if(x3.y ==maxa )							\n"
2263 	"		{											\n"
2264 	"			vec4 TEMP = A1; A1 = A0; A0 = TEMP;	\n"
2265 	"		}else if( x3.z == maxa )					\n"
2266 	"		{											\n"
2267 	"			vec4 TEMP = A2; A2 = A0; A0 = TEMP;	\n"
2268 	"		}											\n"
2269 	"		A0 /= A0.x;									\n"
2270 	"		A1 -= A1.x * A0;							\n"
2271 	"		A2 -= A2.x * A0;							\n"
2272 	"		vec2 x2 = abs(vec2(A1.y, A2.y));		\n"
2273 	"		if( x2.y > x2.x )							\n"
2274 	"		{											\n"
2275 	"			vec3 TEMP = A2.yzw;					\n"
2276 	"			A2.yzw = A1.yzw;						\n"
2277 	"			A1.yzw = TEMP;							\n"
2278 	"			x2.x = x2.y;							\n"
2279 	"		}											\n"
2280 	"		if(x2.x >= 1e-10) {								\n"
2281 	"			A1.yzw /= A1.y;								\n"
2282 	"			A2.yzw -= A2.y * A1.yzw;					\n"
2283 	"			if(abs(A2.z) >= 1e-10) {\n"
2284 	"				offset.z = A2.w /A2.z;				    \n"
2285 	"				offset.y = A1.w - offset.z*A1.z;			    \n"
2286 	"				offset.x = A0.w - offset.z*A0.z - offset.y*A0.y;	\n"
2287 	"				bool test = (abs(cc[0] + 0.5*dot(vec3(fx[0], fy[0], fs), offset ))>float(THRESHOLD1)) ;\n"
2288 	"				if(!test || any( greaterThan(abs(offset), vec3(1.0)))) key = vec4(0.0);\n"
2289 	"			}\n"
2290 	"		}\n"
2291 	"	}\n"
2292 	<<"\n"
2293 	"	float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
2294 	"	gl_FragColor = vec4(keyv,  offset);\n"
2295 	"	}}}}\n"
2296 	"}\n"	<<'\0';
2297 
2298 	else out << "\n"
2299 	"	float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
2300 	"	gl_FragColor =  vec4(keyv, 0.0, 0.0, 0.0);\n"
2301 	"	}}}}\n"
2302 	"}\n"	<<'\0';
2303 
2304 	ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
2305 	s_keypoint = program ;
2306 
2307 	//parameter
2308 	_param_dog_texu = glGetUniformLocation(*program, "texU");
2309 	_param_dog_texd = glGetUniformLocation(*program, "texD");
2310 	if(GlobalUtil::_DarknessAdaption) 	_param_dog_texi = glGetUniformLocation(*program, "texI");
2311 }
SetDogTexParam(int texU,int texD)2312 void ShaderBagPKSL::SetDogTexParam(int texU, int texD)
2313 {
2314 	glUniform1i(_param_dog_texu, 1);
2315 	glUniform1i(_param_dog_texd, 2);
2316 	if(GlobalUtil::_DarknessAdaption)glUniform1i(_param_dog_texi, 3);
2317 }
SetGenListStepParam(int tex,int tex0)2318 void ShaderBagPKSL::SetGenListStepParam(int tex, int tex0)
2319 {
2320 	glUniform1i(_param_genlist_step_tex0, 1);
2321 }
2322 
SetGenVBOParam(float width,float fwidth,float size)2323 void ShaderBagPKSL::SetGenVBOParam(float width, float fwidth,float size)
2324 {
2325 	float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
2326 	glUniform4fv(_param_genvbo_size, 1, sizes);
2327 }
SetGradPassParam(int texP)2328 void ShaderBagPKSL::SetGradPassParam(int texP)
2329 {
2330 	glUniform1i(_param_grad_pass_texp, 1);
2331 }
2332 
LoadDescriptorShader()2333 void ShaderBagPKSL::LoadDescriptorShader()
2334 {
2335 	GlobalUtil::_DescriptorPPT = 16;
2336 	LoadDescriptorShaderF2();
2337     s_rect_description = LoadDescriptorProgramRECT();
2338 }
2339 
LoadDescriptorProgramRECT()2340 ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramRECT()
2341 {
2342 	//one shader outpout 128/8 = 16 , each fragout encodes 4
2343 	//const double twopi = 2.0*3.14159265358979323846;
2344 	//const double rpi  = 8.0/twopi;
2345 	ostringstream out;
2346 	out<<setprecision(8);
2347 	if(GlobalUtil::_KeepShaderLoop)
2348 	{
2349 		out << 	"#define REPEAT4(FUNCTION)\\\n"
2350 				"for(int i = 0; i < 4; ++i)\\\n"
2351 				"{\\\n"
2352 				"	FUNCTION(i);\\\n"
2353 				"}\n";
2354 	}else
2355 	{
2356 		//loop unroll for ATI
2357 		out <<  "#define REPEAT4(FUNCTION)\\\n"
2358 				"FUNCTION(0);\\\n"
2359 				"FUNCTION(1);\\\n"
2360 				"FUNCTION(2);\\\n"
2361 				"FUNCTION(3);\n";
2362 	}
2363 
2364 	out<<"\n"
2365 	"#define M_PI 3.14159265358979323846\n"
2366 	"#define TWO_PI (2.0*M_PI)\n"
2367 	"#define RPI 1.2732395447351626861510701069801\n"
2368 	"#define WF size.z\n"
2369 	"uniform sampler2DRect tex;			\n"
2370 	"uniform sampler2DRect gtex;			\n"
2371 	"uniform sampler2DRect otex;			\n"
2372 	"uniform vec4		dsize;				\n"
2373 	"uniform vec3		size;				\n"
2374 	"void main()			\n"
2375 	"{\n"
2376 	"	vec2 dim	= size.xy;	//image size			\n"
2377 	"	float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
2378 	"	float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5));		\n"
2379 	"	index = floor(index*0.125)+ 0.49;  \n"
2380 	"	vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
2381 	"	vec2 pos = texture2DRect(tex, coord).xy;		\n"
2382 	"	vec2 wsz = texture2DRect(tex, coord).zw;\n"
2383     "   float aspect_ratio = wsz.y / wsz.x;\n"
2384     "   float aspect_sq = aspect_ratio * aspect_ratio; \n"
2385 	"	vec2 spt  = wsz * 0.25; vec2 ispt = 1.0 / spt; \n";
2386 
2387 	//here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
2388 	//and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
2389 	//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
2390 	//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
2391 	out<<
2392 	"	vec4 temp; vec2 pt;				\n"
2393     "	pt.x = pos.x + fract(idx*0.25) * wsz.x;				\n"
2394 	"	pt.y = pos.y + (floor(idx*0.25) + 0.5) * spt.y;			\n";
2395 
2396 	//get a horizontal bounding box of the rotated rectangle
2397 	out<<
2398     "	vec4 sz;					\n"
2399 	"	sz.xy = max(pt - spt, vec2(2,2));\n"
2400 	"	sz.zw = min(pt + spt, dim - vec2(3));		\n"
2401 	"	sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center
2402 	//get voting for two box
2403 
2404 	out<<"\n"
2405 	"	vec4 DA, DB;   vec2 spos;			\n"
2406 	"	DA = DB  = vec4(0.0, 0.0, 0.0, 0.0);		\n"
2407 	"	vec4 nox = vec4(0.0, 1.0, 0.0, 1.0);					\n"
2408 	"	vec4 noy = vec4(0.0, 0.0, 1.0, 1.0);					\n"
2409 	"	for(spos.y = sz.y; spos.y <= sz.w;	spos.y+=1.0)				\n"
2410 	"	{																\n"
2411 	"		for(spos.x = sz.x; spos.x <= sz.z;	spos.x+=1.0)			\n"
2412 	"		{															\n"
2413 	"			vec2 tpt = spos * 2.0 - pt - 0.5;					\n"
2414     "			vec4 nx = (tpt.x + nox) * ispt.x;								\n"
2415     "			vec4 ny = (tpt.y + noy) * ispt.y;			\n"
2416 	"			vec4 nxn = abs(nx), nyn = abs(ny);						\n"
2417     "			bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0));	\n"
2418 	"			if(any(inside))\n"
2419 	"			{\n"
2420 	"				vec4 gg = texture2DRect(gtex, spos);\n"
2421 	"				vec4 oo = texture2DRect(otex, spos);\n"
2422     //"               vec4 cc = cos(oo), ss = sin(oo); \n"
2423     //"               oo = atan(ss* aspect_ratio, cc); \n"
2424     //"               gg = gg * sqrt(ss * ss * aspect_sq + cc * cc); \n "
2425 	"				vec4 theta0 = (- oo)*RPI;\n"
2426 	"				vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0);			\n"
2427 	"				vec4 theta1 = floor(theta);								\n"
2428 	"				vec4 weight = (vec4(1) - nxn) * (vec4(1) - nyn) * gg; \n"
2429 	"				vec4 weight2 = (theta - theta1) * weight;				\n"
2430 	"				vec4 weight1 = weight - weight2;						\n"
2431 	"				#define ADD_DESCRIPTOR(i) \\\n"
2432 	"				if(inside[i])\\\n"
2433 	"				{\\\n"
2434 	"					DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \\\n"
2435 	"					DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \\\n"
2436 	"					DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \\\n"
2437 	"					DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \\\n"
2438 	"				}\n"
2439 	"				REPEAT4(ADD_DESCRIPTOR);\n"
2440 	"			}\n"
2441 	"		}\n"
2442 	"	}\n";
2443 	out<<
2444 	"	 gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
2445 	"}\n"<<'\0';
2446 
2447 	ProgramGLSL * program =  new ProgramGLSL(out.str().c_str());
2448 	if(program->IsNative())
2449 	{
2450 		return program;
2451 	}
2452 	else
2453 	{
2454 		delete program;
2455 		return NULL;
2456 	}
2457 }
2458 
LoadDescriptorProgramPKSL()2459 ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramPKSL()
2460 {
2461 	//one shader outpout 128/8 = 16 , each fragout encodes 4
2462 	//const double twopi = 2.0*3.14159265358979323846;
2463 	//const double rpi  = 8.0/twopi;
2464 	ostringstream out;
2465 	out<<setprecision(8);
2466 
2467 	if(GlobalUtil::_KeepShaderLoop)
2468 	{
2469 		out << 	"#define REPEAT4(FUNCTION)\\\n"
2470 				"for(int i = 0; i < 4; ++i)\\\n"
2471 				"{\\\n"
2472 				"	FUNCTION(i);\\\n"
2473 				"}\n";
2474 	}else
2475 	{
2476 		//loop unroll for ATI
2477 		out <<  "#define REPEAT4(FUNCTION)\\\n"
2478 				"FUNCTION(0);\\\n"
2479 				"FUNCTION(1);\\\n"
2480 				"FUNCTION(2);\\\n"
2481 				"FUNCTION(3);\n";
2482 	}
2483 
2484 	out<<"\n"
2485 	"#define M_PI 3.14159265358979323846\n"
2486 	"#define TWO_PI (2.0*M_PI)\n"
2487 	"#define RPI 1.2732395447351626861510701069801\n"
2488 	"#define WF size.z\n"
2489 	"uniform sampler2DRect tex;			\n"
2490 	"uniform sampler2DRect gtex;			\n"
2491 	"uniform sampler2DRect otex;			\n"
2492 	"uniform vec4		dsize;				\n"
2493 	"uniform vec3		size;				\n"
2494 	"void main()			\n"
2495 	"{\n"
2496 	"	vec2 dim	= size.xy;	//image size			\n"
2497 	"	float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
2498 	"	float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5));		\n"
2499 	"	index = floor(index*0.125)+ 0.49;  \n"
2500 	"	vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
2501 	"	vec2 pos = texture2DRect(tex, coord).xy;		\n"
2502 	"	if(any(lessThan(pos.xy, vec2(1.0))) || any(greaterThan(pos.xy, dim-1.0))) "
2503 	"	//discard;	\n"
2504 	"	{ gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
2505 	"	float anglef = texture2DRect(tex, coord).z;\n"
2506 	"	if(anglef > M_PI) anglef -= TWO_PI;\n"
2507 	"	float sigma = texture2DRect(tex, coord).w; \n"
2508 	"	float spt  = abs(sigma * WF);	//default to be 3*sigma	\n";
2509 	//rotation
2510 	out<<
2511 	"	vec4 cscs, rots;						\n"
2512 	"	cscs.x = cos(anglef); cscs.y = sin(anglef);	\n"
2513 	"	cscs.zw = - cscs.xy;							\n"
2514 	"	rots = cscs /spt;								\n"
2515 	"	cscs *= spt; \n";
2516 
2517 	//here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
2518 	//and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
2519 	//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
2520 	//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
2521 	out<<
2522 	"	vec4 temp; vec2 pt, offsetpt;				\n"
2523 	"	/*the fraction part of idx is .5*/			\n"
2524 	"	offsetpt.x = 4.0* fract(idx*0.25) - 2.0;				\n"
2525 	"	offsetpt.y = floor(idx*0.25) - 1.5;			\n"
2526 	"	temp = cscs.xwyx*offsetpt.xyxy;				\n"
2527 	"	pt = pos + temp.xz + temp.yw;				\n";
2528 
2529 	//get a horizontal bounding box of the rotated rectangle
2530 	out<<
2531 	"	vec2 bwin = abs(cscs.xy);					\n"
2532 	"	float bsz = bwin.x + bwin.y;					\n"
2533 	"	vec4 sz;					\n"
2534 	"	sz.xy = max(pt - vec2(bsz), vec2(2,2));\n"
2535 	"	sz.zw = min(pt + vec2(bsz), dim - vec2(3));		\n"
2536 	"	sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center
2537 	//get voting for two box
2538 
2539 	out<<"\n"
2540 	"	vec4 DA, DB;   vec2 spos;			\n"
2541 	"	DA = DB  = vec4(0.0, 0.0, 0.0, 0.0);		\n"
2542 	"	vec4 nox = vec4(0.0, rots.xy, rots.x + rots.y);					\n"
2543 	"	vec4 noy = vec4(0.0, rots.wx, rots.w + rots.x);					\n"
2544 	"	for(spos.y = sz.y; spos.y <= sz.w;	spos.y+=1.0)				\n"
2545 	"	{																\n"
2546 	"		for(spos.x = sz.x; spos.x <= sz.z;	spos.x+=1.0)			\n"
2547 	"		{															\n"
2548 	"			vec2 tpt = spos * 2.0 - pt - 0.5;					\n"
2549 	"			vec4 temp = rots.xywx * tpt.xyxy;						\n"
2550 	"			vec2 temp2 = temp.xz + temp.yw;						\n"
2551 	"			vec4 nx = temp2.x + nox;								\n"
2552 	"			vec4 ny = temp2.y + noy;			\n"
2553 	"			vec4 nxn = abs(nx), nyn = abs(ny);						\n"
2554 	"			bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0));	\n"
2555 	"			if(any(inside))\n"
2556 	"			{\n"
2557 	"				vec4 gg = texture2DRect(gtex, spos);\n"
2558 	"				vec4 oo = texture2DRect(otex, spos);\n"
2559 	"				vec4 theta0 = (anglef - oo)*RPI;\n"
2560 	"				vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0);			\n"
2561 	"				vec4 theta1 = floor(theta);								\n"
2562 	"				vec4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y;	\n"
2563 	"				vec4 ww = exp(-0.125 * (diffx * diffx + diffy * diffy ));	\n"
2564 	"				vec4 weight = (vec4(1) - nxn) * (vec4(1) - nyn) * gg * ww; \n"
2565 	"				vec4 weight2 = (theta - theta1) * weight;				\n"
2566 	"				vec4 weight1 = weight - weight2;						\n"
2567 	"	#define ADD_DESCRIPTOR(i) \\\n"
2568 	"				if(inside[i])\\\n"
2569 	"				{\\\n"
2570 	"					DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \\\n"
2571 	"					DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \\\n"
2572 	"					DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \\\n"
2573 	"					DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \\\n"
2574 	"				}\n"
2575 	"				REPEAT4(ADD_DESCRIPTOR);\n"
2576 	"			}\n"
2577 	"		}\n"
2578 	"	}\n";
2579 	out<<
2580 	"	 gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
2581 	"}\n"<<'\0';
2582 
2583 	ProgramGLSL * program =  new ProgramGLSL(out.str().c_str());
2584 	if(program->IsNative())
2585 	{
2586 		return program;
2587 	}
2588 	else
2589 	{
2590 		delete program;
2591 		return NULL;
2592 	}
2593 }
2594 
LoadDescriptorShaderF2()2595 void ShaderBagPKSL::LoadDescriptorShaderF2()
2596 {
2597 
2598 	ProgramGLSL * program = LoadDescriptorProgramPKSL();
2599 	if( program )
2600 	{
2601 		s_descriptor_fp = program;
2602 		_param_descriptor_gtex = glGetUniformLocation(*program, "gtex");
2603 		_param_descriptor_otex = glGetUniformLocation(*program, "otex");
2604 		_param_descriptor_size = glGetUniformLocation(*program, "size");
2605 		_param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
2606 	}
2607 }
2608 
2609 
2610 
SetSimpleOrientationInput(int oTex,float sigma,float sigma_step)2611 void ShaderBagPKSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
2612 {
2613 	glUniform1i(_param_orientation_gtex, 1);
2614 	glUniform2f(_param_orientation_size, sigma, sigma_step);
2615 }
2616 
2617 
SetFeatureOrientationParam(int gtex,int width,int height,float sigma,int otex,float step)2618 void ShaderBagPKSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step)
2619 {
2620 	///
2621 	glUniform1i(_param_orientation_gtex, 1);
2622 	glUniform1i(_param_orientation_otex, 2);
2623 
2624 	float size[4];
2625 	size[0] = (float)width;
2626 	size[1] = (float)height;
2627 	size[2] = sigma;
2628 	size[3] = step;
2629 	glUniform4fv(_param_orientation_size, 1, size);
2630 }
2631 
SetFeatureDescirptorParam(int gtex,int otex,float dwidth,float fwidth,float width,float height,float sigma)2632 void ShaderBagPKSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
2633 {
2634     if(sigma == 0 && s_rect_description)
2635     {
2636         //rectangle description mode
2637         s_rect_description->UseProgram();
2638         GLint param_descriptor_gtex = glGetUniformLocation(*s_rect_description, "gtex");
2639 		GLint param_descriptor_otex = glGetUniformLocation(*s_rect_description, "otex");
2640 		GLint param_descriptor_size = glGetUniformLocation(*s_rect_description, "size");
2641 		GLint param_descriptor_dsize = glGetUniformLocation(*s_rect_description, "dsize");
2642 	    ///
2643 	    glUniform1i(param_descriptor_gtex, 1);
2644 	    glUniform1i(param_descriptor_otex, 2);
2645 
2646 	    float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
2647 	    glUniform4fv(param_descriptor_dsize, 1, dsize);
2648 	    float size[3];
2649 	    size[0] = width;
2650 	    size[1] = height;
2651 	    size[2] = GlobalUtil::_DescriptorWindowFactor;
2652 	    glUniform3fv(param_descriptor_size, 1, size);
2653     }else
2654     {
2655 	    ///
2656 	    glUniform1i(_param_descriptor_gtex, 1);
2657 	    glUniform1i(_param_descriptor_otex, 2);
2658 
2659 
2660 	    float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
2661 	    glUniform4fv(_param_descriptor_dsize, 1, dsize);
2662 	    float size[3];
2663 	    size[0] = width;
2664 	    size[1] = height;
2665 	    size[2] = GlobalUtil::_DescriptorWindowFactor;
2666 	    glUniform3fv(_param_descriptor_size, 1, size);
2667     }
2668 
2669 }
2670 
2671 
SetGenListEndParam(int ktex)2672 void ShaderBagPKSL::SetGenListEndParam(int ktex)
2673 {
2674 	glUniform1i(_param_genlist_end_ktex, 1);
2675 }
SetGenListInitParam(int w,int h)2676 void ShaderBagPKSL::SetGenListInitParam(int w, int h)
2677 {
2678 	float bbox[4] = {(w -1.0f) * 0.5f +0.25f, (w-1.0f) * 0.5f - 0.25f,  (h - 1.0f) * 0.5f + 0.25f, (h-1.0f) * 0.5f - 0.25f};
2679 	glUniform4fv(_param_genlist_init_bbox, 1, bbox);
2680 }
2681 
SetMarginCopyParam(int xmax,int ymax)2682 void ShaderBagPKSL::SetMarginCopyParam(int xmax, int ymax)
2683 {
2684 	float truncate[4];
2685 	truncate[0] = (xmax - 0.5f) * 0.5f; //((xmax + 1)  >> 1) - 0.5f;
2686 	truncate[1] = (ymax - 0.5f) * 0.5f; //((ymax + 1)  >> 1) - 0.5f;
2687 	truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f;
2688 	truncate[3] = truncate[2] +  (((ymax % 2) == 1)? 0.0f : 2.0f);
2689 	glUniform4fv(_param_margin_copy_truncate, 1,  truncate);
2690 }
2691