1 ////////////////////////////////////////////////////////////////////////////
2 // File: ProgramGLSL.cpp
3 // Author: Changchang Wu
4 // Description : GLSL related classes
5 // class ProgramGLSL A simple wrapper of GLSL programs
6 // class ShaderBagGLSL GLSL shaders for SIFT
7 // class FilterGLSL GLSL gaussian filters for SIFT
8 //
9 // Copyright (c) 2007 University of North Carolina at Chapel Hill
10 // All Rights Reserved
11 //
12 // Permission to use, copy, modify and distribute this software and its
13 // documentation for educational, research and non-profit purposes, without
14 // fee, and without a written agreement is hereby granted, provided that the
15 // above copyright notice and the following paragraph appear in all copies.
16 //
17 // The University of North Carolina at Chapel Hill make no representations
18 // about the suitability of this software for any purpose. It is provided
19 // 'as is' without express or implied warranty.
20 //
21 // Please send BUG REPORTS to ccwu@cs.unc.edu
22 //
23 ////////////////////////////////////////////////////////////////////////////
24
25
26 #include "GL/glew.h"
27 #include <string.h>
28 #include <stdio.h>
29 #include <iomanip>
30 #include <iostream>
31 #include <sstream>
32 #include <vector>
33 #include <algorithm>
34 #include <math.h>
35 using namespace std;
36
37 #include "GlobalUtil.h"
38 #include "ProgramGLSL.h"
39 #include "GLTexImage.h"
40 #include "ShaderMan.h"
41 #include "SiftGPU.h"
42
ShaderObject(int shadertype,const char * source,int filesource)43 ProgramGLSL::ShaderObject::ShaderObject(int shadertype, const char * source, int filesource)
44 {
45
46
47 _type = shadertype;
48 _compiled = 0;
49
50
51 _shaderID = glCreateShader(shadertype);
52 if(_shaderID == 0) return;
53
54 if(source)
55 {
56
57 GLint code_length;
58 if(filesource ==0)
59 {
60 const char* code = source;
61 code_length = (GLint) strlen(code);
62 glShaderSource(_shaderID, 1, (const char **) &code, &code_length);
63 }else
64 {
65 char * code;
66 if((code_length= ReadShaderFile(source, code)) ==0) return;
67 glShaderSource(_shaderID, 1, (const char **) &code, &code_length);
68 delete code;
69 }
70
71 glCompileShader(_shaderID);
72
73 CheckCompileLog();
74
75 if(!_compiled) std::cout << source;
76 }
77
78
79
80
81 }
82
ReadShaderFile(const char * sourcefile,char * & code)83 int ProgramGLSL::ShaderObject::ReadShaderFile(const char *sourcefile, char*& code )
84 {
85 code = NULL;
86 FILE * file;
87 int len=0;
88
89 if(sourcefile == NULL) return 0;
90
91 file = fopen(sourcefile,"rt");
92 if(file == NULL) return 0;
93
94
95 fseek(file, 0, SEEK_END);
96 len = ftell(file);
97 rewind(file);
98 if(len >1)
99 {
100 code = new char[len+1];
101 fread(code, sizeof( char), len, file);
102 code[len] = 0;
103 }else
104 {
105 len = 0;
106 }
107
108 fclose(file);
109
110 return len;
111
112 }
113
CheckCompileLog()114 void ProgramGLSL::ShaderObject::CheckCompileLog()
115 {
116
117 GLint status;
118 glGetShaderiv(_shaderID, GL_COMPILE_STATUS, &status);
119 _compiled = (status ==GL_TRUE);
120
121 if(_compiled == 0) PrintCompileLog(std::cout);
122
123
124 }
125
~ShaderObject()126 ProgramGLSL::ShaderObject::~ShaderObject()
127 {
128 if(_shaderID) glDeleteShader(_shaderID);
129
130 }
131
IsValidFragmentShader()132 int ProgramGLSL::ShaderObject::IsValidFragmentShader()
133 {
134 return _type == GL_FRAGMENT_SHADER && _shaderID && _compiled;
135 }
136
IsValidVertexShader()137 int ProgramGLSL::ShaderObject::IsValidVertexShader()
138 {
139 return _type == GL_VERTEX_SHADER && _shaderID && _compiled;
140 }
141
142
PrintCompileLog(ostream & os)143 void ProgramGLSL::ShaderObject::PrintCompileLog(ostream&os)
144 {
145 GLint len = 0;
146
147 glGetShaderiv(_shaderID, GL_INFO_LOG_LENGTH , &len);
148 if(len <=1) return;
149
150 char * compileLog = new char[len+1];
151 if(compileLog == NULL) return;
152
153 glGetShaderInfoLog(_shaderID, len, &len, compileLog);
154
155
156 os<<"Compile Log\n"<<compileLog<<"\n";
157
158 delete[] compileLog;
159 }
160
161
ProgramGLSL()162 ProgramGLSL::ProgramGLSL()
163 {
164 _linked = 0;
165 _TextureParam0 = -1;
166 _programID = glCreateProgram();
167 }
~ProgramGLSL()168 ProgramGLSL::~ProgramGLSL()
169 {
170 if(_programID)glDeleteProgram(_programID);
171 }
AttachShaderObject(ShaderObject & shader)172 void ProgramGLSL::AttachShaderObject(ShaderObject &shader)
173 {
174 if(_programID && shader.IsValidShaderObject())
175 glAttachShader(_programID, shader.GetShaderID());
176 }
DetachShaderObject(ShaderObject & shader)177 void ProgramGLSL::DetachShaderObject(ShaderObject &shader)
178 {
179 if(_programID && shader.IsValidShaderObject())
180 glDetachShader(_programID, shader.GetShaderID());
181 }
LinkProgram()182 int ProgramGLSL::LinkProgram()
183 {
184 _linked = 0;
185
186 if(_programID==0) return 0;
187
188 glLinkProgram(_programID);
189
190 CheckLinkLog();
191
192 // GlobalUtil::StartTimer("100 link test");
193 // for(int i = 0; i<100; i++) glLinkProgram(_programID);
194 // GlobalUtil::StopTimer();
195
196 return _linked;
197 }
198
CheckLinkLog()199 void ProgramGLSL::CheckLinkLog()
200 {
201 GLint status;
202 glGetProgramiv(_programID, GL_LINK_STATUS, &status);
203
204 _linked = (status == GL_TRUE);
205
206 }
207
208
ValidateProgram()209 int ProgramGLSL::ValidateProgram()
210 {
211 if(_programID && _linked)
212 {
213 /// GLint status;
214 // glValidateProgram(_programID);
215 // glGetProgramiv(_programID, GL_VALIDATE_STATUS, &status);
216 // return status == GL_TRUE;
217 return 1;
218 }
219 else
220 return 0;
221 }
222
PrintLinkLog(std::ostream & os)223 void ProgramGLSL::PrintLinkLog(std::ostream &os)
224 {
225 GLint len = 0;
226
227 glGetProgramiv(_programID, GL_INFO_LOG_LENGTH , &len);
228 if(len <=1) return;
229
230 char* linkLog = new char[len+1];
231 if(linkLog == NULL) return;
232
233 glGetProgramInfoLog(_programID, len, &len, linkLog);
234
235 linkLog[len] = 0;
236
237 if(strstr(linkLog, "failed"))
238 {
239 os<<linkLog + (linkLog[0] == ' '? 1:0)<<"\n";
240 _linked = 0;
241 }
242
243 delete[] linkLog;
244 }
245
UseProgram()246 int ProgramGLSL::UseProgram()
247 {
248 if(ValidateProgram())
249 {
250 glUseProgram(_programID);
251 if (_TextureParam0 >= 0) glUniform1i(_TextureParam0, 0);
252 return true;
253 }
254 else
255 {
256 return false;
257 }
258 }
259
260
ProgramGLSL(const char * frag_source)261 ProgramGLSL::ProgramGLSL(const char *frag_source)
262 {
263 _linked = 0;
264 _programID = glCreateProgram();
265 _TextureParam0 = -1;
266 ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
267
268 if(shader.IsValidFragmentShader())
269 {
270 AttachShaderObject(shader);
271 LinkProgram();
272
273 if(!_linked)
274 {
275 //shader.PrintCompileLog(std::cout);
276 PrintLinkLog(std::cout);
277 } else
278 {
279 _TextureParam0 = glGetUniformLocation(_programID, "tex");
280 }
281 }else
282 {
283 _linked = 0;
284 }
285
286 }
287
288 /*
289 ProgramGLSL::ProgramGLSL(char*frag_source, char * vert_source)
290 {
291 _used = 0;
292 _linked = 0;
293 _programID = glCreateProgram();
294 ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
295 ShaderObject vertex_shader(GL_VERTEX_SHADER, vert_source);
296 AttachShaderObject(shader);
297 AttachShaderObject(vertex_shader);
298 LinkProgram();
299 if(!_linked)
300 {
301 shader.PrintCompileLog(std::cout);
302 vertex_shader.PrintCompileLog(std::cout);
303 PrintLinkLog(std::cout);
304 std::cout<<vert_source;
305 std::cout<<frag_source;
306 }
307
308 }
309 */
310
311
312
ReLink()313 void ProgramGLSL::ReLink()
314 {
315 glLinkProgram(_programID);
316 }
317
IsNative()318 int ProgramGLSL::IsNative()
319 {
320 return _linked;
321 }
322
FilterGLSL(float sigma)323 FilterGLSL::FilterGLSL(float sigma)
324 {
325 //pixel inside 3*sigma box
326 int sz = int( ceil( GlobalUtil::_FilterWidthFactor * sigma -0.5) ) ;//
327 int width = 2*sz + 1;
328
329 //filter size truncation
330 if(GlobalUtil::_MaxFilterWidth >0 && width > GlobalUtil::_MaxFilterWidth)
331 {
332 std::cout<<"Filter size truncated from "<<width<<" to "<<GlobalUtil::_MaxFilterWidth<<endl;
333 sz = GlobalUtil::_MaxFilterWidth>>1;
334 width = 2 * sz + 1;
335 }
336
337 int i;
338 float * kernel = new float[width];
339 float rv = 1.0f/(sigma*sigma);
340 float v, ksum =0;
341
342 // pre-compute filter
343 for( i = -sz ; i <= sz ; ++i)
344 {
345 kernel[i+sz] = v = exp(-0.5f * i * i *rv) ;
346 ksum += v;
347 }
348
349 //normalize the kernel
350 rv = 1.0f / ksum;
351 for(i = 0; i< width ;i++) kernel[i]*=rv;
352 //
353
354 MakeFilterProgram(kernel, width);
355
356 _size = sz;
357
358 delete[] kernel;
359 if(GlobalUtil::_verbose && GlobalUtil::_timingL) std::cout<<"Filter: sigma = "<<sigma<<", size = "<<width<<"x"<<width<<endl;
360 }
361
362
MakeFilterProgram(float kernel[],int width)363 void FilterGLSL::MakeFilterProgram(float kernel[], int width)
364 {
365 if(GlobalUtil::_usePackedTex)
366 {
367 s_shader_h = CreateFilterHPK(kernel, width);
368 s_shader_v = CreateFilterVPK(kernel, width);
369 }else
370 {
371 s_shader_h = CreateFilterH(kernel, width);
372 s_shader_v = CreateFilterV(kernel, width);
373 }
374 }
375
CreateFilterH(float kernel[],int width)376 ProgramGPU* FilterGLSL::CreateFilterH(float kernel[], int width)
377 {
378 ostringstream out;
379 out<<setprecision(8);
380
381 out<< "uniform sampler2DRect tex;";
382 out<< "\nvoid main(void){ float intensity = 0.0 ; vec2 pos;\n";
383
384 int half_width = width / 2;
385 for(int i = 0; i< width; i++)
386 {
387 if(i == half_width)
388 {
389
390 out<<"float or = texture2DRect(tex, gl_TexCoord[0].st).r;\n";
391 out<<"intensity+= or * "<<kernel[i]<<";\n";
392 }else
393 {
394 out<<"pos = gl_TexCoord[0].st + vec2(float("<< (i - half_width) <<") , 0);\n";
395 out<<"intensity+= "<<kernel[i]<<"*texture2DRect(tex, pos).r;\n";
396 }
397 }
398
399 //copy original data to red channel
400 out<<"gl_FragColor.r = or;\n";
401 out<<"gl_FragColor.b = intensity;}\n"<<'\0';
402
403 return new ProgramGLSL(out.str().c_str());
404 }
405
406
CreateFilterV(float kernel[],int height)407 ProgramGPU* FilterGLSL::CreateFilterV(float kernel[], int height)
408 {
409 ostringstream out;
410 out<<setprecision(8);
411
412 out<< "uniform sampler2DRect tex;";
413 out<< "\nvoid main(void){ float intensity = 0.0;vec2 pos; \n";
414 int half_height = height / 2;
415 for(int i = 0; i< height; i++)
416 {
417
418 if(i == half_height)
419 {
420 out<<"vec2 orb = texture2DRect(tex, gl_TexCoord[0].st).rb;\n";
421 out<<"intensity+= orb.y * "<<kernel[i]<<";\n";
422
423 }else
424 {
425 out<<"pos = gl_TexCoord[0].st + vec2(0, float("<<(i - half_height) <<") );\n";
426 out<<"intensity+= texture2DRect(tex, pos).b * "<<kernel[i]<<";\n";
427 }
428
429 }
430
431 out<<"gl_FragColor.b = orb.y;\n";
432 out<<"gl_FragColor.g = intensity - orb.x;\n"; // difference of gaussian..
433 out<<"gl_FragColor.r = intensity;}\n"<<'\0';
434
435 // std::cout<<buffer<<endl;
436 return new ProgramGLSL(out.str().c_str());
437 }
438
439
440
CreateFilterHPK(float kernel[],int width)441 ProgramGPU* FilterGLSL::CreateFilterHPK(float kernel[], int width)
442 {
443 //both h and v are packed...
444 int i, j , xw, xwn;
445
446 int halfwidth = width >>1;
447 float * pf = kernel + halfwidth;
448 int nhpixel = (halfwidth+1)>>1; //how many neighbour pixels need to be looked up
449 int npixel = (nhpixel<<1)+1;//
450 float weight[3];
451 ostringstream out;;
452 out<<setprecision(8);
453
454 out<< "uniform sampler2DRect tex;";
455 out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
456 ///use multi texture coordinate because nhpixels can be at most 3
457 out<<"vec4 pc; vec2 coord; \n";
458 for( i = 0 ; i < npixel ; i++)
459 {
460 out<<"coord = gl_TexCoord[0].xy + vec2(float("<<i-nhpixel<<"),0);\n";
461 out<<"pc=texture2DRect(tex, coord);\n";
462 if(GlobalUtil::_PreciseBorder) out<<"if(coord.x < 0.0) pc = pc.rrbb;\n";
463 //for each sub-pixel j in center, the weight of sub-pixel k
464 xw = (i - nhpixel)*2;
465 for( j = 0; j < 3; j++)
466 {
467 xwn = xw + j -1;
468 weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
469 }
470 if(weight[1] == 0.0)
471 {
472 out<<"result += vec4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
473 }
474 else
475 {
476 out<<"result += vec4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n";
477 out<<"result += vec4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n";
478 }
479
480 }
481 out<<"gl_FragColor = result;}\n"<<'\0';
482
483 return new ProgramGLSL(out.str().c_str());
484
485
486 }
487
488
CreateFilterVPK(float kernel[],int height)489 ProgramGPU* FilterGLSL::CreateFilterVPK(float kernel[], int height)
490 {
491
492 //both h and v are packed...
493 int i, j, yw, ywn;
494
495 int halfh = height >>1;
496 float * pf = kernel + halfh;
497 int nhpixel = (halfh+1)>>1; //how many neighbour pixels need to be looked up
498 int npixel = (nhpixel<<1)+1;//
499 float weight[3];
500 ostringstream out;;
501 out<<setprecision(8);
502
503 out<< "uniform sampler2DRect tex;";
504 out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
505 ///use multi texture coordinate because nhpixels can be at most 3
506 out<<"vec4 pc; vec2 coord;\n";
507 for( i = 0 ; i < npixel ; i++)
508 {
509 out<<"coord = gl_TexCoord[0].xy + vec2(0, float("<<i-nhpixel<<"));\n";
510 out<<"pc=texture2DRect(tex, coord);\n";
511 if(GlobalUtil::_PreciseBorder) out<<"if(coord.y < 0.0) pc = pc.rgrg;\n";
512
513 //for each sub-pixel j in center, the weight of sub-pixel k
514 yw = (i - nhpixel)*2;
515 for( j = 0; j < 3; j++)
516 {
517 ywn = yw + j -1;
518 weight[j] = ywn < -halfh || ywn > halfh? 0 : pf[ywn];
519 }
520 if(weight[1] == 0.0)
521 {
522 out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
523 }else
524 {
525 out<<"result += vec4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n";
526 out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n";
527 }
528 }
529 out<<"gl_FragColor = result;}\n"<<'\0';
530
531 return new ProgramGLSL(out.str().c_str());
532 }
533
534
535
ShaderBag()536 ShaderBag::ShaderBag()
537 {
538 s_debug = 0;
539 s_orientation = 0;
540 s_display_gaussian = 0;
541 s_display_dog = 0;
542 s_display_grad = 0;
543 s_display_keys = 0;
544 s_sampling = 0;
545 s_grad_pass = 0;
546 s_dog_pass = 0;
547 s_keypoint = 0;
548 s_genlist_init_tight = 0;
549 s_genlist_init_ex = 0;
550 s_genlist_histo = 0;
551 s_genlist_start = 0;
552 s_genlist_step = 0;
553 s_genlist_end = 0;
554 s_vertex_list = 0;
555 s_descriptor_fp = 0;
556 s_margin_copy = 0;
557 ////////////
558 f_gaussian_skip0 = NULL;
559 f_gaussian_skip1 = NULL;
560 f_gaussian_step = NULL;
561 _gaussian_step_num = 0;
562
563 }
564
~ShaderBag()565 ShaderBag::~ShaderBag()
566 {
567 if(s_debug)delete s_debug;
568 if(s_orientation)delete s_orientation;
569 if(s_display_gaussian)delete s_display_gaussian;
570 if(s_display_dog)delete s_display_dog;
571 if(s_display_grad)delete s_display_grad;
572 if(s_display_keys)delete s_display_keys;
573 if(s_sampling)delete s_sampling;
574 if(s_grad_pass)delete s_grad_pass;
575 if(s_dog_pass) delete s_dog_pass;
576 if(s_keypoint)delete s_keypoint;
577 if(s_genlist_init_tight)delete s_genlist_init_tight;
578 if(s_genlist_init_ex)delete s_genlist_init_ex;
579 if(s_genlist_histo)delete s_genlist_histo;
580 if(s_genlist_start)delete s_genlist_start;
581 if(s_genlist_step)delete s_genlist_step;
582 if(s_genlist_end)delete s_genlist_end;
583 if(s_vertex_list)delete s_vertex_list;
584 if(s_descriptor_fp)delete s_descriptor_fp;
585 if(s_margin_copy) delete s_margin_copy;
586
587 //////////////////////////////////////////////
588 if(f_gaussian_skip1) delete f_gaussian_skip1;
589
590 for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
591 {
592 if(f_gaussian_skip0_v[i]) delete f_gaussian_skip0_v[i];
593 }
594 if(f_gaussian_step && _gaussian_step_num > 0)
595 {
596 for(int i = 0; i< _gaussian_step_num; i++)
597 {
598 delete f_gaussian_step[i];
599 }
600 delete[] f_gaussian_step;
601 }
602 }
603
604
SelectInitialSmoothingFilter(int octave_min,SiftParam & param)605 void ShaderBag::SelectInitialSmoothingFilter(int octave_min, SiftParam¶m)
606 {
607 float sigma = param.GetInitialSmoothSigma(octave_min);
608 if(sigma == 0)
609 {
610 f_gaussian_skip0 = NULL;
611 }else
612 {
613 for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
614 {
615 if(f_gaussian_skip0_v[i]->_id == octave_min)
616 {
617 f_gaussian_skip0 = f_gaussian_skip0_v[i];
618 return ;
619 }
620 }
621 FilterGLSL * filter = new FilterGLSL(sigma);
622 filter->_id = octave_min;
623 f_gaussian_skip0_v.push_back(filter);
624 f_gaussian_skip0 = filter;
625 }
626 }
627
CreateGaussianFilters(SiftParam & param)628 void ShaderBag::CreateGaussianFilters(SiftParam¶m)
629 {
630 if(param._sigma_skip0>0.0f)
631 {
632 FilterGLSL * filter;
633 f_gaussian_skip0 = filter = new FilterGLSL(param._sigma_skip0);
634 filter->_id = GlobalUtil::_octave_min_default;
635 f_gaussian_skip0_v.push_back(filter);
636 }
637 if(param._sigma_skip1>0.0f)
638 {
639 f_gaussian_skip1 = new FilterGLSL(param._sigma_skip1);
640 }
641
642 f_gaussian_step = new FilterProgram*[param._sigma_num];
643 for(int i = 0; i< param._sigma_num; i++)
644 {
645 f_gaussian_step[i] = new FilterGLSL(param._sigma[i]);
646 }
647 _gaussian_step_num = param._sigma_num;
648 }
649
650
LoadDynamicShaders(SiftParam & param)651 void ShaderBag::LoadDynamicShaders(SiftParam& param)
652 {
653 LoadKeypointShader(param._dog_threshold, param._edge_threshold);
654 LoadGenListShader(param._dog_level_num, 0);
655 CreateGaussianFilters(param);
656 }
657
658
LoadFixedShaders()659 void ShaderBagGLSL::LoadFixedShaders()
660 {
661
662
663 s_gray = new ProgramGLSL(
664 "uniform sampler2DRect tex; void main(void){\n"
665 "float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex, gl_TexCoord[0].st ).rgb);\n"
666 "gl_FragColor = vec4(intensity, intensity, intensity, 1.0);}");
667
668
669 s_debug = new ProgramGLSL( "void main(void){gl_FragColor.rg = gl_TexCoord[0].st;}");
670
671
672 s_sampling = new ProgramGLSL(
673 "uniform sampler2DRect tex; void main(void){gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg;}");
674
675 //
676 s_grad_pass = new ProgramGLSL(
677 "uniform sampler2DRect tex; void main ()\n"
678 "{\n"
679 " vec4 v1, v2, gg;\n"
680 " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
681 " gg.x = texture2DRect(tex, gl_TexCoord[1].xy).r;\n"
682 " gg.y = texture2DRect(tex, gl_TexCoord[2].xy).r;\n"
683 " gg.z = texture2DRect(tex, gl_TexCoord[3].xy).r;\n"
684 " gg.w = texture2DRect(tex, gl_TexCoord[4].xy).r;\n"
685 " vec2 dxdy = (gg.yw - gg.xz); \n"
686 " float grad = 0.5*length(dxdy);\n"
687 " float theta = grad==0.0? 0.0: atan(dxdy.y, dxdy.x);\n"
688 " gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
689 "}\n\0");
690
691 ProgramGLSL * program;
692 s_margin_copy = program = new ProgramGLSL(
693 "uniform sampler2DRect tex; uniform vec2 truncate;\n"
694 "void main(){ gl_FragColor = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate)); }");
695
696 _param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
697
698
699 GlobalUtil::_OrientationPack2 = 0;
700 LoadOrientationShader();
701
702 if(s_orientation == NULL)
703 {
704 //Load a simplified version if the right version is not supported
705 s_orientation = program = new ProgramGLSL(
706 "uniform sampler2DRect tex; uniform sampler2DRect oTex;\n"
707 " uniform float size; void main(){\n"
708 " vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"
709 " vec4 oo = texture2DRect(oTex, cc.rg);\n"
710 " gl_FragColor.rg = cc.rg;\n"
711 " gl_FragColor.b = oo.a;\n"
712 " gl_FragColor.a = size;}");
713
714 _param_orientation_gtex = glGetUniformLocation(*program, "oTex");
715 _param_orientation_size = glGetUniformLocation(*program, "size");
716 GlobalUtil::_MaxOrientation = 0;
717 GlobalUtil::_FullSupported = 0;
718 std::cerr<<"Orientation simplified on this hardware"<<endl;
719 }
720
721 if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();
722 if(s_descriptor_fp == NULL)
723 {
724 GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0;
725 std::cerr<<"Descriptor ignored on this hardware"<<endl;
726 }
727
728 s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");
729 }
730
731
LoadDisplayShaders()732 void ShaderBagGLSL::LoadDisplayShaders()
733 {
734 s_copy_key = new ProgramGLSL(
735 "uniform sampler2DRect tex; void main(){\n"
736 "gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg; gl_FragColor.ba = vec2(0.0,1.0); }");
737
738
739 ProgramGLSL * program;
740 s_vertex_list = program = new ProgramGLSL(
741 "uniform vec4 sizes; uniform sampler2DRect tex;\n"
742 "void main(void){\n"
743 "float fwidth = sizes.y; float twidth = sizes.z; float rwidth = sizes.w; \n"
744 "float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
745 "float px = mod(index, twidth);\n"
746 "vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
747 "vec4 cc = texture2DRect(tex, tpos );\n"
748 "float size = 3.0 * cc.a; //sizes.x;// \n"
749 "gl_FragColor.zw = vec2(0.0, 1.0);\n"
750 "if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy; }\n"
751 "else {float type = fract(px);\n"
752 "vec2 dxy = vec2(0); \n"
753 "dxy.x = type < 0.1 ? 0.0 : (((type <0.5) || (type > 0.9))? size : -size);\n"
754 "dxy.y = type < 0.2 ? 0.0 : (((type < 0.3) || (type > 0.7) )? -size :size); \n"
755 "float s = sin(cc.b); float c = cos(cc.b); \n"
756 "gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
757 "gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n}\n");
758
759 _param_genvbo_size = glGetUniformLocation(*program, "sizes");
760
761 s_display_gaussian = new ProgramGLSL(
762 "uniform sampler2DRect tex; void main(void){float r = texture2DRect(tex, gl_TexCoord[0].st).r;\n"
763 "gl_FragColor = vec4(r, r, r, 1);}" );
764
765 s_display_dog = new ProgramGLSL(
766 "uniform sampler2DRect tex; void main(void){float g = 0.5+(20.0*texture2DRect(tex, gl_TexCoord[0].st).g);\n"
767 "gl_FragColor = vec4(g, g, g, 0.0);}" );
768
769 s_display_grad = new ProgramGLSL(
770 "uniform sampler2DRect tex; void main(void){\n"
771 " vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);gl_FragColor = vec4(5.0* cc.bbb, 1.0);}");
772
773 s_display_keys= new ProgramGLSL(
774 "uniform sampler2DRect tex; void main(void){\n"
775 " vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"
776 " if(cc.r ==0.0) discard; gl_FragColor = (cc.r==1.0? vec4(1.0, 0.0, 0,1.0):vec4(0.0,1.0,0.0,1.0));}");
777 }
778
LoadKeypointShader(float threshold,float edge_threshold)779 void ShaderBagGLSL::LoadKeypointShader(float threshold, float edge_threshold)
780 {
781 float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
782 float threshold1 = threshold;
783 float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
784 ostringstream out;;
785 streampos pos;
786
787 //tex(X)(Y)
788 //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)
789 //Y: (CDU) (CENTER 0, DOWN -1, UP +1)
790 if(GlobalUtil::_DarknessAdaption)
791 {
792 out << "#define THRESHOLD0 (" << threshold0 << " * min(2.0 * cc.r + 0.1, 1.0))\n"
793 "#define THRESHOLD1 (" << threshold1 << " * min(2.0 * cc.r + 0.1, 1.0))\n"
794 "#define THRESHOLD2 " << threshold2 << "\n";
795 }else
796 {
797 out << "#define THRESHOLD0 " << threshold0 << "\n"
798 "#define THRESHOLD1 " << threshold1 << "\n"
799 "#define THRESHOLD2 " << threshold2 << "\n";
800 }
801
802 out<<
803 "uniform sampler2DRect tex, texU, texD; void main ()\n"
804 "{\n"
805 " vec4 v1, v2, gg, temp;\n"
806 " vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
807 " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
808 " temp = texture2DRect(tex, gl_TexCoord[1].xy);\n"
809 " v1.x = temp.g; gg.x = temp.r;\n"
810 " temp = texture2DRect(tex, gl_TexCoord[2].xy) ;\n"
811 " v1.y = temp.g; gg.y = temp.r;\n"
812 " temp = texture2DRect(tex, gl_TexCoord[3].xy) ;\n"
813 " v1.z = temp.g; gg.z = temp.r;\n"
814 " temp = texture2DRect(tex, gl_TexCoord[4].xy) ;\n"
815 " v1.w = temp.g; gg.w = temp.r;\n"
816 " v2.x = texture2DRect(tex, gl_TexCoord[5].xy).g;\n"
817 " v2.y = texture2DRect(tex, gl_TexCoord[6].xy).g;\n"
818 " v2.z = texture2DRect(tex, gl_TexCoord[7].xy).g;\n"
819 " v2.w = texture2DRect(tex, TexRU.xy).g;\n"
820 " vec2 dxdy = (gg.yw - gg.xz); \n"
821 " float grad = 0.5*length(dxdy);\n"
822 " float theta = grad==0.0? 0.0: atan(dxdy.y, dxdy.x);\n"
823 " gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
824
825 //test against 8 neighbours
826 //use variable to identify type of extremum
827 //1.0 for local maximum and 0.5 for minimum
828 <<
829 " float dog = 0.0; \n"
830 " gl_FragData[1] = vec4(0, 0, 0, 0); \n"
831 " dog = cc.g > float(THRESHOLD0) && all(greaterThan(cc.gggg, max(v1, v2)))?1.0: 0.0;\n"
832 " dog = cc.g < float(-THRESHOLD0) && all(lessThan(cc.gggg, min(v1, v2)))?0.5: dog;\n"
833 " if(dog == 0.0) return;\n";
834
835 pos = out.tellp();
836 //do edge supression first..
837 //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
838 //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
839
840 out<<
841 " float fxx, fyy, fxy; \n"
842 " vec4 D2 = v1.xyzw - cc.gggg;\n"
843 " vec2 D4 = v2.xw - v2.yz;\n"
844 " fxx = D2.x + D2.y;\n"
845 " fyy = D2.z + D2.w;\n"
846 " fxy = 0.25*(D4.x + D4.y);\n"
847 " float fxx_plus_fyy = fxx + fyy;\n"
848 " float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
849 " float score_down = (fxx*fyy - fxy*fxy);\n"
850 " if( score_down <= 0.0 || score_up > THRESHOLD2 * score_down)return;\n";
851
852 //...
853 out<<" \n"
854 " vec2 D5 = 0.5*(v1.yw-v1.xz); \n"
855 " float fx = D5.x, fy = D5.y ; \n"
856 " float fs, fss , fxs, fys ; \n"
857 " vec2 v3; vec4 v4, v5, v6;\n"
858 //read 9 pixels of upper level
859 <<
860 " v3.x = texture2DRect(texU, gl_TexCoord[0].xy).g;\n"
861 " v4.x = texture2DRect(texU, gl_TexCoord[1].xy).g;\n"
862 " v4.y = texture2DRect(texU, gl_TexCoord[2].xy).g;\n"
863 " v4.z = texture2DRect(texU, gl_TexCoord[3].xy).g;\n"
864 " v4.w = texture2DRect(texU, gl_TexCoord[4].xy).g;\n"
865 " v6.x = texture2DRect(texU, gl_TexCoord[5].xy).g;\n"
866 " v6.y = texture2DRect(texU, gl_TexCoord[6].xy).g;\n"
867 " v6.z = texture2DRect(texU, gl_TexCoord[7].xy).g;\n"
868 " v6.w = texture2DRect(texU, TexRU.xy).g;\n"
869 //compare with 9 pixels of upper level
870 //read and compare with 9 pixels of lower level
871 //the maximum case
872 <<
873 " if(dog == 1.0)\n"
874 " {\n"
875 " if(cc.g < v3.x || any(lessThan(cc.gggg, v4)) ||any(lessThan(cc.gggg, v6)))return; \n"
876 " v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
877 " v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
878 " v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
879 " v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
880 " v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
881 " v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
882 " v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
883 " v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
884 " v6.w = texture2DRect(texD, TexRU.xy).g;\n"
885 " if(cc.g < v3.y || any(lessThan(cc.gggg, v5)) ||any(lessThan(cc.gggg, v6)))return; \n"
886 " }\n"
887 //the minimum case
888 <<
889 " else{\n"
890 " if(cc.g > v3.x || any(greaterThan(cc.gggg, v4)) ||any(greaterThan(cc.gggg, v6)))return; \n"
891 " v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
892 " v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
893 " v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
894 " v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
895 " v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
896 " v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
897 " v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
898 " v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
899 " v6.w = texture2DRect(texD, TexRU.xy).g;\n"
900 " if(cc.g > v3.y || any(greaterThan(cc.gggg, v5)) ||any(greaterThan(cc.gggg, v6)))return; \n"
901 " }\n";
902
903 if(GlobalUtil::_SubpixelLocalization)
904
905 // sub-pixel localization FragData1 = vec4(dog, 0, 0, 0); return;
906 out <<
907 " fs = 0.5*( v3.x - v3.y ); \n"
908 " fss = v3.x + v3.y - cc.g - cc.g;\n"
909 " fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
910 " fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
911
912 //
913 // let dog difference be quatratic function of dx, dy, ds;
914 // df(dx, dy, ds) = fx * dx + fy*dy + fs * ds +
915 // + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)
916 // + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)
917 // (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)
918
919 //the local extremum satisfies
920 // df/dx = 0, df/dy = 0, df/dz = 0
921
922 //that is
923 // |-fx| | fxx fxy fxs | |dx|
924 // |-fy| = | fxy fyy fys | * |dy|
925 // |-fs| | fxs fys fss | |ds|
926 // need to solve dx, dy, ds
927
928 // Use Gauss elimination to solve the linear system
929 <<
930 " vec3 dxys = vec3(0.0); \n"
931 " vec4 A0, A1, A2 ; \n"
932 " A0 = vec4(fxx, fxy, fxs, -fx); \n"
933 " A1 = vec4(fxy, fyy, fys, -fy); \n"
934 " A2 = vec4(fxs, fys, fss, -fs); \n"
935 " vec3 x3 = abs(vec3(fxx, fxy, fxs)); \n"
936 " float maxa = max(max(x3.x, x3.y), x3.z); \n"
937 " if(maxa >= 1e-10 ) { \n"
938 " if(x3.y ==maxa ) \n"
939 " { \n"
940 " vec4 TEMP = A1; A1 = A0; A0 = TEMP; \n"
941 " }else if( x3.z == maxa ) \n"
942 " { \n"
943 " vec4 TEMP = A2; A2 = A0; A0 = TEMP; \n"
944 " } \n"
945 " A0 /= A0.x; \n"
946 " A1 -= A1.x * A0; \n"
947 " A2 -= A2.x * A0; \n"
948 " vec2 x2 = abs(vec2(A1.y, A2.y)); \n"
949 " if( x2.y > x2.x ) \n"
950 " { \n"
951 " vec3 TEMP = A2.yzw; \n"
952 " A2.yzw = A1.yzw; \n"
953 " A1.yzw = TEMP; \n"
954 " x2.x = x2.y; \n"
955 " } \n"
956 " if(x2.x >= 1e-10) { \n"
957 " A1.yzw /= A1.y; \n"
958 " A2.yzw -= A2.y * A1.yzw; \n"
959 " if(abs(A2.z) >= 1e-10) { \n"
960 // compute dx, dy, ds:
961 <<
962 " \n"
963 " dxys.z = A2.w /A2.z; \n"
964 " dxys.y = A1.w - dxys.z*A1.z; \n"
965 " dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y; \n"
966
967 //one more threshold which I forgot in versions prior to 286
968 <<
969 " bool dog_test = (abs(cc.g + 0.5*dot(vec3(fx, fy, fs), dxys ))<= float(THRESHOLD1)) ;\n"
970 " if(dog_test || any(greaterThan(abs(dxys), vec3(1.0)))) dog = 0.0;\n"
971 " }\n"
972 " }\n"
973 " }\n"
974 //keep the point when the offset is less than 1
975 <<
976 " gl_FragData[1] = vec4( dog, dxys); \n";
977 else
978
979 out<<
980 " gl_FragData[1] = vec4( dog, 0.0, 0.0, 0.0) ; \n";
981
982 out<<
983 "}\n" <<'\0';
984
985
986
987 ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
988 if(program->IsNative())
989 {
990 s_keypoint = program ;
991 //parameter
992 }else
993 {
994 delete program;
995 out.seekp(pos);
996 out <<
997 " gl_FragData[1] = vec4(dog, 0.0, 0.0, 0.0) ; \n"
998 "}\n" <<'\0';
999 s_keypoint = program = new ProgramGLSL(out.str().c_str());
1000 GlobalUtil::_SubpixelLocalization = 0;
1001 std::cerr<<"Detection simplified on this hardware"<<endl;
1002 }
1003
1004 _param_dog_texu = glGetUniformLocation(*program, "texU");
1005 _param_dog_texd = glGetUniformLocation(*program, "texD");
1006 }
1007
1008
SetDogTexParam(int texU,int texD)1009 void ShaderBagGLSL::SetDogTexParam(int texU, int texD)
1010 {
1011 glUniform1i(_param_dog_texu, 1);
1012 glUniform1i(_param_dog_texd, 2);
1013 }
1014
SetGenListStepParam(int tex,int tex0)1015 void ShaderBagGLSL::SetGenListStepParam(int tex, int tex0)
1016 {
1017 glUniform1i(_param_genlist_step_tex0, 1);
1018 }
SetGenVBOParam(float width,float fwidth,float size)1019 void ShaderBagGLSL::SetGenVBOParam( float width, float fwidth, float size)
1020 {
1021 float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
1022 glUniform4fv(_param_genvbo_size, 1, sizes);
1023
1024 }
1025
1026
1027
UnloadProgram()1028 void ShaderBagGLSL::UnloadProgram()
1029 {
1030 glUseProgram(0);
1031 }
1032
1033
1034
LoadGenListShader(int ndoglev,int nlev)1035 void ShaderBagGLSL::LoadGenListShader(int ndoglev, int nlev)
1036 {
1037 ProgramGLSL * program;
1038
1039 s_genlist_init_tight = new ProgramGLSL(
1040 "uniform sampler2DRect tex; void main (void){\n"
1041 "vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
1042 "texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
1043 "gl_FragColor = vec4(greaterThan(helper, vec4(0.0,0.0,0.0,0.0)));\n"
1044 "}");
1045
1046
1047 s_genlist_init_ex = program = new ProgramGLSL(
1048 "uniform sampler2DRect tex;uniform vec2 bbox;\n"
1049 "void main (void ){\n"
1050 "vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
1051 "texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
1052 "bvec4 helper2 = bvec4( \n"
1053 "all(lessThan(gl_TexCoord[0].xy , bbox)) && helper.x >0.0,\n"
1054 "all(lessThan(gl_TexCoord[1].xy , bbox)) && helper.y >0.0,\n"
1055 "all(lessThan(gl_TexCoord[2].xy , bbox)) && helper.z >0.0,\n"
1056 "all(lessThan(gl_TexCoord[3].xy , bbox)) && helper.w >0.0);\n"
1057 "gl_FragColor = vec4(helper2);\n"
1058 "}");
1059 _param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
1060
1061
1062 //reduction ...
1063 s_genlist_histo = new ProgramGLSL(
1064 "uniform sampler2DRect tex; void main (void){\n"
1065 "vec4 helper; vec4 helper2; \n"
1066 "helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
1067 "helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
1068 "gl_FragColor.rg = helper2.xz + helper2.yw;\n"
1069 "helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
1070 "helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
1071 "gl_FragColor.ba= helper2.xz+helper2.yw;\n"
1072 "}");
1073
1074
1075 //read of the first part, which generates tex coordinates
1076 s_genlist_start= program = LoadGenListStepShader(1, 1);
1077 _param_ftex_width= glGetUniformLocation(*program, "width");
1078 _param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
1079 //stepping
1080 s_genlist_step = program = LoadGenListStepShader(0, 1);
1081 _param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
1082
1083 }
1084
SetMarginCopyParam(int xmax,int ymax)1085 void ShaderBagGLSL::SetMarginCopyParam(int xmax, int ymax)
1086 {
1087 float truncate[2] = {xmax - 0.5f , ymax - 0.5f};
1088 glUniform2fv(_param_margin_copy_truncate, 1, truncate);
1089 }
1090
SetGenListInitParam(int w,int h)1091 void ShaderBagGLSL::SetGenListInitParam(int w, int h)
1092 {
1093 float bbox[2] = {w - 1.0f, h - 1.0f};
1094 glUniform2fv(_param_genlist_init_bbox, 1, bbox);
1095 }
SetGenListStartParam(float width,int tex0)1096 void ShaderBagGLSL::SetGenListStartParam(float width, int tex0)
1097 {
1098 glUniform1f(_param_ftex_width, width);
1099 glUniform1i(_param_genlist_start_tex0, 0);
1100 }
1101
1102
LoadGenListStepShader(int start,int step)1103 ProgramGLSL* ShaderBagGLSL::LoadGenListStepShader(int start, int step)
1104 {
1105 int i;
1106 // char chanels[5] = "rgba";
1107 ostringstream out;
1108
1109 for(i = 0; i < step; i++) out<<"uniform sampler2DRect tex"<<i<<";\n";
1110 if(start)
1111 {
1112 out<<"uniform float width;\n";
1113 out<<"void main(void){\n";
1114 out<<"float index = floor(gl_TexCoord[0].y) * width + floor(gl_TexCoord[0].x);\n";
1115 out<<"vec2 pos = vec2(0.5, 0.5);\n";
1116 }else
1117 {
1118 out<<"uniform sampler2DRect tex;\n";
1119 out<<"void main(void){\n";
1120 out<<"vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n";
1121 out<<"vec2 pos = tc.rg; float index = tc.b;\n";
1122 }
1123 out<<"vec2 sum; vec4 cc;\n";
1124
1125
1126 if(step>0)
1127 {
1128 out<<"vec2 cpos = vec2(-0.5, 0.5);\t vec2 opos;\n";
1129 for(i = 0; i < step; i++)
1130 {
1131
1132 out<<"cc = texture2DRect(tex"<<i<<", pos);\n";
1133 out<<"sum.x = cc.r + cc.g; sum.y = sum.x + cc.b; \n";
1134 out<<"if (index <cc.r){ opos = cpos.xx;}\n";
1135 out<<"else if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}\n";
1136 out<<"else if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}\n";
1137 out<<"else {opos = cpos.yy; index -= sum.y;}\n";
1138 out<<"pos = (pos + pos + opos);\n";
1139 }
1140 }
1141 out<<"gl_FragColor = vec4(pos, index, 1.0);\n";
1142 out<<"}\n"<<'\0';
1143 return new ProgramGLSL(out.str().c_str());
1144 }
1145
1146
LoadOrientationShader()1147 void ShaderBagGLSL::LoadOrientationShader()
1148 {
1149 ostringstream out;
1150
1151 if(GlobalUtil::_IsNvidia)
1152 {
1153 out << "#pragma optionNV(ifcvt none)\n"
1154 "#pragma optionNV(unroll all)\n";
1155 }
1156
1157 out<<"\n"
1158 "#define GAUSSIAN_WF float("<<GlobalUtil::_OrientationGaussianFactor<<") \n"
1159 "#define SAMPLE_WF float("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
1160 "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
1161 "uniform sampler2DRect tex; \n"
1162 "uniform sampler2DRect gradTex; \n"
1163 "uniform vec4 size; \n"
1164 << ((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)? " uniform sampler2DRect texS; \n" : " ") <<
1165 "void main() \n"
1166 "{ \n"
1167 " vec4 bins[10]; \n"
1168 " bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0); \n"
1169 " bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0); \n"
1170 " bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0); \n"
1171 " vec4 loc = texture2DRect(tex, gl_TexCoord[0].xy); \n"
1172 " vec2 pos = loc.xy; \n"
1173 " bool orientation_mode = (size.z != 0.0); \n"
1174 " float sigma = orientation_mode? abs(size.z) : loc.w; \n";
1175 if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
1176 {
1177 out<<
1178 " if(orientation_mode){\n"
1179 " vec4 offset = texture2DRect(texS, pos);\n"
1180 " pos.xy = pos.xy + offset.yz; \n"
1181 " sigma = sigma * pow(size.w, offset.w);\n"
1182 " #if "<< GlobalUtil::_KeepExtremumSign << "\n"
1183 " if(offset.x < 0.6) sigma = -sigma; \n"
1184 " #endif\n"
1185 " }\n";
1186 }
1187 out<<
1188 " //bool fixed_orientation = (size.z < 0.0); \n"
1189 " if(size.z < 0.0) {gl_FragData[0] = vec4(pos, 0.0, sigma); return;}"
1190 " float gsigma = sigma * GAUSSIAN_WF; \n"
1191 " vec2 win = abs(vec2(sigma * (SAMPLE_WF * GAUSSIAN_WF))) ; \n"
1192 " vec2 dim = size.xy; \n"
1193 " float dist_threshold = win.x*win.x+0.5; \n"
1194 " float factor = -0.5/(gsigma*gsigma); \n"
1195 " vec4 sz; vec2 spos; \n"
1196 " //if(any(pos.xy <= 1)) discard; \n"
1197 " sz.xy = max( pos - win, vec2(1,1)); \n"
1198 " sz.zw = min( pos + win, dim-vec2(2, 2)); \n"
1199 " sz = floor(sz)+0.5;";
1200 //loop to get the histogram
1201
1202 out<<"\n"
1203 " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
1204 " { \n"
1205 " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
1206 " { \n"
1207 " vec2 offset = spos - pos; \n"
1208 " float sq_dist = dot(offset,offset); \n"
1209 " if( sq_dist < dist_threshold){ \n"
1210 " vec4 cc = texture2DRect(gradTex, spos); \n"
1211 " float grad = cc.b; float theta = cc.a; \n"
1212 " float idx = floor(degrees(theta)*0.1); \n"
1213 " if(idx < 0.0 ) idx += 36.0; \n"
1214 " float weight = grad*exp(sq_dist * factor); \n"
1215 " float vidx = fract(idx * 0.25) * 4.0;//mod(idx, 4.0) ; \n"
1216 " vec4 inc = weight*vec4(equal(vec4(vidx), vec4(0.0,1.0,2.0,3.0)));";
1217
1218 if(GlobalUtil::_UseDynamicIndexing)
1219 {
1220 //dynamic indexing may not be faster
1221 out<<"\n"
1222 " int iidx = int((idx*0.25)); \n"
1223 " bins[iidx]+=inc; \n"
1224 " } \n"
1225 " } \n"
1226 " }";
1227
1228 }else
1229 {
1230 //nvfp40 still does not support dynamic array indexing
1231 //unrolled binary search...
1232 out<<"\n"
1233 " if(idx < 16.0) \n"
1234 " { \n"
1235 " if(idx < 8.0) \n"
1236 " { \n"
1237 " if(idx < 4.0) { bins[0]+=inc;} \n"
1238 " else { bins[1]+=inc;} \n"
1239 " }else \n"
1240 " { \n"
1241 " if(idx < 12.0){ bins[2]+=inc;} \n"
1242 " else { bins[3]+=inc;} \n"
1243 " } \n"
1244 " }else if(idx < 32.0) \n"
1245 " { \n"
1246 " if(idx < 24.0) \n"
1247 " { \n"
1248 " if(idx <20.0) { bins[4]+=inc;} \n"
1249 " else { bins[5]+=inc;} \n"
1250 " }else \n"
1251 " { \n"
1252 " if(idx < 28.0){ bins[6]+=inc;} \n"
1253 " else { bins[7]+=inc;} \n"
1254 " } \n"
1255 " }else \n"
1256 " { \n"
1257 " bins[8]+=inc; \n"
1258 " } \n"
1259 " } \n"
1260 " } \n"
1261 " }";
1262
1263 }
1264
1265 WriteOrientationCodeToStream(out);
1266
1267 ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
1268 if(program->IsNative())
1269 {
1270 s_orientation = program ;
1271 _param_orientation_gtex = glGetUniformLocation(*program, "gradTex");
1272 _param_orientation_size = glGetUniformLocation(*program, "size");
1273 _param_orientation_stex = glGetUniformLocation(*program, "texS");
1274 }else
1275 {
1276 delete program;
1277 }
1278 }
1279
1280
WriteOrientationCodeToStream(std::ostream & out)1281 void ShaderBagGLSL::WriteOrientationCodeToStream(std::ostream& out)
1282 {
1283 //smooth histogram and find the largest
1284 /*
1285 smoothing kernel: (1 3 6 7 6 3 1 )/27
1286 the same as 3 pass of (1 1 1)/3 averaging
1287 maybe better to use 4 pass on the vectors...
1288 */
1289
1290
1291 //the inner loop on different array numbers is always unrolled in fp40
1292
1293 //bug fixed here:)
1294 out<<"\n"
1295 " //mat3 m1 = mat3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0; \n"
1296 " mat3 m1 = mat3(1, 3, 6, 0, 1, 3,0, 0, 1)/27.0; \n"
1297 " mat4 m2 = mat4(7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;\n"
1298 " #define FILTER_CODE(i) { \\\n"
1299 " vec4 newb = (bins[i]* m2); \\\n"
1300 " newb.xyz += ( prev.yzw * m1); \\\n"
1301 " prev = bins[i]; \\\n"
1302 " newb.wzy += ( bins[i+1].zyx *m1); \\\n"
1303 " bins[i] = newb;}\n"
1304 " for (int j=0; j<2; j++) \n"
1305 " { \n"
1306 " vec4 prev = bins[8]; \n"
1307 " bins[9] = bins[0]; \n";
1308
1309 if(GlobalUtil::_KeepShaderLoop)
1310 {
1311 out<<
1312 " for (int i=0; i<9; i++) \n"
1313 " { \n"
1314 " FILTER_CODE(i); \n"
1315 " } \n"
1316 " }";
1317
1318 }else
1319 {
1320 //manually unroll the loop for ATI.
1321 out <<
1322 " FILTER_CODE(0);\n"
1323 " FILTER_CODE(1);\n"
1324 " FILTER_CODE(2);\n"
1325 " FILTER_CODE(3);\n"
1326 " FILTER_CODE(4);\n"
1327 " FILTER_CODE(5);\n"
1328 " FILTER_CODE(6);\n"
1329 " FILTER_CODE(7);\n"
1330 " FILTER_CODE(8);\n"
1331 " }\n";
1332 }
1333 //find the maximum voting
1334 out<<"\n"
1335 " vec4 maxh; vec2 maxh2; \n"
1336 " vec4 maxh4 = max(max(max(max(max(max(max(max(bins[0], bins[1]), bins[2]), \n"
1337 " bins[3]), bins[4]), bins[5]), bins[6]), bins[7]), bins[8]);\n"
1338 " maxh2 = max(maxh4.xy, maxh4.zw); maxh = vec4(max(maxh2.x, maxh2.y));";
1339
1340 std::string testpeak_code;
1341 std::string savepeak_code;
1342
1343 //save two/three/four orientations with the largest votings?
1344
1345 if(GlobalUtil::_MaxOrientation>1)
1346 {
1347 out<<"\n"
1348 " vec4 Orientations = vec4(0.0, 0.0, 0.0, 0.0); \n"
1349 " vec4 weights = vec4(0.0,0.0,0.0,0.0); ";
1350
1351 testpeak_code = "\\\n"
1352 " {test = greaterThan(bins[i], hh);";
1353
1354 //save the orientations in weight-decreasing order
1355 if(GlobalUtil::_MaxOrientation ==2)
1356 {
1357 savepeak_code = "\\\n"
1358 " if(weight <=weights.g){}\\\n"
1359 " else if(weight >weights.r)\\\n"
1360 " {weights.rg = vec2(weight, weights.r); Orientations.rg = vec2(th, Orientations.r);}\\\n"
1361 " else {weights.g = weight; Orientations.g = th;}";
1362 }else if(GlobalUtil::_MaxOrientation ==3)
1363 {
1364 savepeak_code = "\\\n"
1365 " if(weight <=weights.b){}\\\n"
1366 " else if(weight >weights.r)\\\n"
1367 " {weights.rgb = vec3(weight, weights.rg); Orientations.rgb = vec3(th, Orientations.rg);}\\\n"
1368 " else if(weight >weights.g)\\\n"
1369 " {weights.gb = vec2(weight, weights.g); Orientations.gb = vec2(th, Orientations.g);}\\\n"
1370 " else {weights.b = weight; Orientations.b = th;}";
1371 }else
1372 {
1373 savepeak_code = "\\\n"
1374 " if(weight <=weights.a){}\\\n"
1375 " else if(weight >weights.r)\\\n"
1376 " {weights = vec4(weight, weights.rgb); Orientations = vec4(th, Orientations.rgb);}\\\n"
1377 " else if(weight >weights.g)\\\n"
1378 " {weights.gba = vec3(weight, weights.gb); Orientations.gba = vec3(th, Orientations.gb);}\\\n"
1379 " else if(weight >weights.b)\\\n"
1380 " {weights.ba = vec2(weight, weights.b); Orientations.ba = vec2(th, Orientations.b);}\\\n"
1381 " else {weights.a = weight; Orientations.a = th;}";
1382 }
1383
1384 }else
1385 {
1386 out<<"\n"
1387 " float Orientation; ";
1388 testpeak_code ="\\\n"
1389 " if(npeaks<=0.0){\\\n"
1390 " test = equal(bins[i], maxh) ;";
1391 savepeak_code="\\\n"
1392 " npeaks++; \\\n"
1393 " Orientation = th;";
1394
1395 }
1396 //find the peaks
1397 out <<"\n"
1398 " #define FINDPEAK(i, k)" <<testpeak_code<<"\\\n"
1399 " if( any ( test) ) \\\n"
1400 " { \\\n"
1401 " if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y ) \\\n"
1402 " { \\\n"
1403 " float di = -0.5 * (bins[i].y-prevb) / (bins[i].y+prevb-bins[i].x - bins[i].x) ; \\\n"
1404 " float th = (k+di+0.5); float weight = bins[i].x;"
1405 <<savepeak_code<<"\\\n"
1406 " }\\\n"
1407 " else if(test.g && all( greaterThan(bins[i].yy , bins[i].xz)) ) \\\n"
1408 " { \\\n"
1409 " float di = -0.5 * (bins[i].z-bins[i].x) / (bins[i].z+bins[i].x-bins[i].y- bins[i].y) ; \\\n"
1410 " float th = (k+di+1.5); float weight = bins[i].y; "
1411 <<savepeak_code<<" \\\n"
1412 " }\\\n"
1413 " if(test.b && all( greaterThan( bins[i].zz , bins[i].yw)) ) \\\n"
1414 " { \\\n"
1415 " float di = -0.5 * (bins[i].w-bins[i].y) / (bins[i].w+bins[i].y-bins[i].z- bins[i].z) ; \\\n"
1416 " float th = (k+di+2.5); float weight = bins[i].z; "
1417 <<savepeak_code<<" \\\n"
1418 " }\\\n"
1419 " else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x ) \\\n"
1420 " { \\\n"
1421 " float di = -0.5 * (bins[i+1].x-bins[i].z) / (bins[i+1].x+bins[i].z-bins[i].w - bins[i].w) ; \\\n"
1422 " float th = (k+di+3.5); float weight = bins[i].w; "
1423 <<savepeak_code<<" \\\n"
1424 " }\\\n"
1425 " }}\\\n"
1426 " prevb = bins[i].w;";
1427 //the following loop will be unrolled anyway in fp40,
1428 //taking more than 1000 instrucsions..
1429 //....
1430 if(GlobalUtil::_KeepShaderLoop)
1431 {
1432 out<<"\n"
1433 " vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test; \n"
1434 " bins[9] = bins[0]; \n"
1435 " float npeaks = 0.0, k = 0.0; \n"
1436 " float prevb = bins[8].w; \n"
1437 " for (int i = 0; i < 9; i++) \n"
1438 " {\n"
1439 " FINDPEAK(i, k);\n"
1440 " k = k + 4.0; \n"
1441 " }";
1442 }else
1443 {
1444 //loop unroll for ATI.
1445 out <<"\n"
1446 " vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test;\n"
1447 " bins[9] = bins[0]; \n"
1448 " float npeaks = 0.0; \n"
1449 " float prevb = bins[8].w; \n"
1450 " FINDPEAK(0, 0.0);\n"
1451 " FINDPEAK(1, 4.0);\n"
1452 " FINDPEAK(2, 8.0);\n"
1453 " FINDPEAK(3, 12.0);\n"
1454 " FINDPEAK(4, 16.0);\n"
1455 " FINDPEAK(5, 20.0);\n"
1456 " FINDPEAK(6, 24.0);\n"
1457 " FINDPEAK(7, 28.0);\n"
1458 " FINDPEAK(8, 32.0);\n";
1459 }
1460 //WRITE output
1461 if(GlobalUtil::_MaxOrientation>1)
1462 {
1463 out<<"\n"
1464 " if(orientation_mode){\n"
1465 " npeaks = dot(vec4(1,1,"
1466 <<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<","
1467 <<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), vec4(greaterThan(weights, hh)));\n"
1468 " gl_FragData[0] = vec4(pos, npeaks, sigma);\n"
1469 " gl_FragData[1] = radians((Orientations )*10.0);\n"
1470 " }else{\n"
1471 " gl_FragData[0] = vec4(pos, radians((Orientations.x)*10.0), sigma);\n"
1472 " }\n";
1473 }else
1474 {
1475 out<<"\n"
1476 " gl_FragData[0] = vec4(pos, radians((Orientation)*10.0), sigma);\n";
1477 }
1478 //end
1479 out<<"\n"
1480 "}\n"<<'\0';
1481
1482
1483 }
1484
SetSimpleOrientationInput(int oTex,float sigma,float sigma_step)1485 void ShaderBagGLSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
1486 {
1487 glUniform1i(_param_orientation_gtex, 1);
1488 glUniform1f(_param_orientation_size, sigma);
1489 }
1490
1491
1492
1493
SetFeatureOrientationParam(int gtex,int width,int height,float sigma,int stex,float step)1494 void ShaderBagGLSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step)
1495 {
1496 ///
1497 glUniform1i(_param_orientation_gtex, 1);
1498
1499 if((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)&& stex)
1500 {
1501 //specify texutre for subpixel subscale localization
1502 glUniform1i(_param_orientation_stex, 2);
1503 }
1504
1505 float size[4];
1506 size[0] = (float)width;
1507 size[1] = (float)height;
1508 size[2] = sigma;
1509 size[3] = step;
1510 glUniform4fv(_param_orientation_size, 1, size);
1511 }
1512
1513
LoadDescriptorShaderF2()1514 void ShaderBagGLSL::LoadDescriptorShaderF2()
1515 {
1516 //one shader outpout 128/8 = 16 , each fragout encodes 4
1517 //const double twopi = 2.0*3.14159265358979323846;
1518 //const double rpi = 8.0/twopi;
1519 ostringstream out;
1520 out<<setprecision(8);
1521
1522 out<<"\n"
1523 "#define M_PI 3.14159265358979323846\n"
1524 "#define TWO_PI (2.0*M_PI)\n"
1525 "#define RPI 1.2732395447351626861510701069801\n"
1526 "#define WF size.z\n"
1527 "uniform sampler2DRect tex; \n"
1528 "uniform sampler2DRect gradTex; \n"
1529 "uniform vec4 dsize; \n"
1530 "uniform vec3 size; \n"
1531 "void main() \n"
1532 "{\n"
1533 " vec2 dim = size.xy; //image size \n"
1534 " float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
1535 " float idx = 8.0 * fract(index * 0.125) + 8.0 * floor(2.0 * fract(gl_TexCoord[0].y * 0.5)); \n"
1536 " index = floor(index*0.125) + 0.49; \n"
1537 " vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
1538 " vec2 pos = texture2DRect(tex, coord).xy; \n"
1539 " if(any(lessThanEqual(pos.xy, vec2(1.0))) || any(greaterThanEqual(pos.xy, dim-1.0)))// discard; \n"
1540 " { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
1541 " float anglef = texture2DRect(tex, coord).z;\n"
1542 " if(anglef > M_PI) anglef -= TWO_PI;\n"
1543 " float sigma = texture2DRect(tex, coord).w; \n"
1544 " float spt = abs(sigma * WF); //default to be 3*sigma \n";
1545
1546 //rotation
1547 out<<
1548 " vec4 cscs, rots; \n"
1549 " cscs.y = sin(anglef); cscs.x = cos(anglef); \n"
1550 " cscs.zw = - cscs.xy; \n"
1551 " rots = cscs /spt; \n"
1552 " cscs *= spt; \n";
1553
1554 //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
1555 //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
1556 //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
1557 //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
1558
1559 out<<
1560 "vec4 temp; vec2 pt, offsetpt; \n"
1561 " /*the fraction part of idx is .5*/ \n"
1562 " offsetpt.x = 4.0* fract(idx*0.25) - 2.0; \n"
1563 " offsetpt.y = floor(idx*0.25) - 1.5; \n"
1564 " temp = cscs.xwyx*offsetpt.xyxy; \n"
1565 " pt = pos + temp.xz + temp.yw; \n";
1566
1567 //get a horizontal bounding box of the rotated rectangle
1568 out<<
1569 " vec2 bwin = abs(cscs.xy); \n"
1570 " float bsz = bwin.x + bwin.y; \n"
1571 " vec4 sz; \n"
1572 " sz.xy = max(pt - vec2(bsz), vec2(1,1));\n"
1573 " sz.zw = min(pt + vec2(bsz), dim - vec2(2, 2)); \n"
1574 " sz = floor(sz)+0.5;"; //move sample point to pixel center
1575 //get voting for two box
1576
1577 out<<"\n"
1578 " vec4 DA, DB; vec2 spos; \n"
1579 " DA = DB = vec4(0.0, 0.0, 0.0, 0.0); \n"
1580 " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
1581 " { \n"
1582 " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
1583 " { \n"
1584 " vec2 diff = spos - pt; \n"
1585 " temp = rots.xywx * diff.xyxy;\n"
1586 " vec2 nxy = (temp.xz + temp.yw); \n"
1587 " vec2 nxyn = abs(nxy); \n"
1588 " if(all( lessThan(nxyn, vec2(1.0)) ))\n"
1589 " {\n"
1590 " vec4 cc = texture2DRect(gradTex, spos); \n"
1591 " float mod = cc.b; float angle = cc.a; \n"
1592 " float theta0 = RPI * (anglef - angle); \n"
1593 " float theta = theta0 < 0.0? theta0 + 8.0 : theta0;;\n"
1594 " diff = nxy + offsetpt.xy; \n"
1595 " float ww = exp(-0.125*dot(diff, diff));\n"
1596 " vec2 weights = vec2(1) - nxyn;\n"
1597 " float weight = weights.x * weights.y *mod*ww; \n"
1598 " float theta1 = floor(theta); \n"
1599 " float weight2 = (theta - theta1) * weight;\n"
1600 " float weight1 = weight - weight2;\n"
1601 " DA += vec4(equal(vec4(theta1), vec4(0, 1, 2, 3)))*weight1;\n"
1602 " DA += vec4(equal(vec4(theta1), vec4(7, 0, 1, 2)))*weight2; \n"
1603 " DB += vec4(equal(vec4(theta1), vec4(4, 5, 6, 7)))*weight1;\n"
1604 " DB += vec4(equal(vec4(theta1), vec4(3, 4, 5, 6)))*weight2; \n"
1605 " }\n"
1606 " }\n"
1607 " }\n";
1608
1609 out<<
1610 " gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
1611 "}\n"<<'\0';
1612
1613 ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
1614
1615 if(program->IsNative())
1616 {
1617 s_descriptor_fp = program ;
1618 _param_descriptor_gtex = glGetUniformLocation(*program, "gradTex");
1619 _param_descriptor_size = glGetUniformLocation(*program, "size");
1620 _param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
1621 }else
1622 {
1623 delete program;
1624 }
1625
1626
1627 }
1628
LoadDescriptorShader()1629 void ShaderBagGLSL::LoadDescriptorShader()
1630 {
1631 GlobalUtil::_DescriptorPPT = 16;
1632 LoadDescriptorShaderF2();
1633 }
1634
1635
SetFeatureDescirptorParam(int gtex,int otex,float dwidth,float fwidth,float width,float height,float sigma)1636 void ShaderBagGLSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma)
1637 {
1638 ///
1639 glUniform1i(_param_descriptor_gtex, 1);
1640
1641 float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
1642 glUniform4fv(_param_descriptor_dsize, 1, dsize);
1643 float size[3];
1644 size[0] = width;
1645 size[1] = height;
1646 size[2] = GlobalUtil::_DescriptorWindowFactor;
1647 glUniform3fv(_param_descriptor_size, 1, size);
1648
1649 }
1650
1651 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1652
LoadFixedShaders()1653 void ShaderBagPKSL::LoadFixedShaders()
1654 {
1655 ProgramGLSL * program;
1656
1657
1658 s_gray = new ProgramGLSL(
1659 "uniform sampler2DRect tex; void main(){\n"
1660 "float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex,gl_TexCoord[0].xy ).rgb);\n"
1661 "gl_FragColor= vec4(intensity, intensity, intensity, 1.0);}" );
1662
1663
1664 s_sampling = new ProgramGLSL(
1665 "uniform sampler2DRect tex; void main(){\n"
1666 "gl_FragColor= vec4( texture2DRect(tex,gl_TexCoord[0].st ).r,texture2DRect(tex,gl_TexCoord[1].st ).r,\n"
1667 " texture2DRect(tex,gl_TexCoord[2].st ).r,texture2DRect(tex,gl_TexCoord[3].st ).r);}" );
1668
1669
1670 s_margin_copy = program = new ProgramGLSL(
1671 "uniform sampler2DRect tex; uniform vec4 truncate; void main(){\n"
1672 "vec4 cc = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate.xy)); \n"
1673 "bvec2 ob = lessThan(gl_TexCoord[0].xy, truncate.xy);\n"
1674 "if(ob.y) { gl_FragColor = (truncate.z ==0.0 ? cc.rrbb : cc.ggaa); } \n"
1675 "else if(ob.x) {gl_FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"
1676 "else { vec4 weights = vec4(vec4(0.0, 1.0, 2.0, 3.0) == truncate.wwww);\n"
1677 "float v = dot(weights, cc); gl_FragColor = vec4(v);}}");
1678
1679 _param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
1680
1681
1682
1683 s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");
1684
1685
1686
1687 s_grad_pass = program = new ProgramGLSL(
1688 "uniform sampler2DRect tex; uniform sampler2DRect texp; void main ()\n"
1689 "{\n"
1690 " vec4 v1, v2, gg;\n"
1691 " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
1692 " vec4 cp = texture2DRect(texp, gl_TexCoord[0].xy);\n"
1693 " gl_FragData[0] = cc - cp; \n"
1694 " vec4 cl = texture2DRect(tex, gl_TexCoord[1].xy); vec4 cr = texture2DRect(tex, gl_TexCoord[2].xy);\n"
1695 " vec4 cd = texture2DRect(tex, gl_TexCoord[3].xy); vec4 cu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
1696 " vec4 dx = (vec4(cr.rb, cc.ga) - vec4(cc.rb, cl.ga)).zxwy;\n"
1697 " vec4 dy = (vec4(cu.rg, cc.ba) - vec4(cc.rg, cd.ba)).zwxy;\n"
1698 " vec4 grad = 0.5 * sqrt(dx*dx + dy * dy);\n"
1699 " gl_FragData[1] = grad;\n"
1700 " vec4 invalid = vec4(equal(grad, vec4(0.0))); \n"
1701 " vec4 ov = atan(dy, dx + invalid); \n"
1702 " gl_FragData[2] = ov; \n"
1703 "}\n\0"); //when
1704
1705 _param_grad_pass_texp = glGetUniformLocation(*program, "texp");
1706
1707
1708 GlobalUtil::_OrientationPack2 = 0;
1709 LoadOrientationShader();
1710
1711 if(s_orientation == NULL)
1712 {
1713 //Load a simplified version if the right version is not supported
1714 s_orientation = program = new ProgramGLSL(
1715 "uniform sampler2DRect tex; uniform sampler2DRect oTex; uniform vec2 size; void main(){\n"
1716 " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
1717 " vec2 co = cc.xy * 0.5; \n"
1718 " vec4 oo = texture2DRect(oTex, co);\n"
1719 " bvec2 bo = lessThan(fract(co), vec2(0.5)); \n"
1720 " float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"
1721 " gl_FragColor = vec4(cc.rg, o, size.x * pow(size.y, cc.a));}");
1722
1723 _param_orientation_gtex= glGetUniformLocation(*program, "oTex");
1724 _param_orientation_size= glGetUniformLocation(*program, "size");
1725 GlobalUtil::_MaxOrientation = 0;
1726 GlobalUtil::_FullSupported = 0;
1727 std::cerr<<"Orientation simplified on this hardware"<<endl;
1728 }
1729
1730 if(GlobalUtil::_DescriptorPPT)
1731 {
1732 LoadDescriptorShader();
1733 if(s_descriptor_fp == NULL)
1734 {
1735 GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0;
1736 std::cerr<<"Descriptor ignored on this hardware"<<endl;
1737 }
1738 }
1739 }
1740
1741
LoadDisplayShaders()1742 void ShaderBagPKSL::LoadDisplayShaders()
1743 {
1744 ProgramGLSL * program;
1745
1746 s_copy_key = new ProgramGLSL(
1747 "uniform sampler2DRect tex;void main(){\n"
1748 "gl_FragColor= vec4(texture2DRect(tex, gl_TexCoord[0].xy).rg, 0,1);}");
1749
1750 //shader used to write a vertex buffer object
1751 //which is used to draw the quads of each feature
1752 s_vertex_list = program = new ProgramGLSL(
1753 "uniform sampler2DRect tex; uniform vec4 sizes; void main(){\n"
1754 "float fwidth = sizes.y; \n"
1755 "float twidth = sizes.z; \n"
1756 "float rwidth = sizes.w; \n"
1757 "float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
1758 "float px = mod(index, twidth);\n"
1759 "vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
1760 "vec4 cc = texture2DRect(tex, tpos );\n"
1761 "float size = 3.0 * cc.a; \n"
1762 "gl_FragColor.zw = vec2(0.0, 1.0);\n"
1763 "if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy;}else \n"
1764 "{\n"
1765 " float type = fract(px);\n"
1766 " vec2 dxy; float s, c;\n"
1767 " dxy.x = type < 0.1 ? 0.0 : (((type <0.5) || (type > 0.9))? size : -size);\n"
1768 " dxy.y = type < 0.2 ? 0.0 : (((type < 0.3) || (type > 0.7) )? -size :size); \n"
1769 " s = sin(cc.b); c = cos(cc.b); \n"
1770 " gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
1771 " gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
1772 "}\n\0");
1773 /*gl_FragColor = vec4(tpos, 0.0, 1.0);}\n\0");*/
1774
1775 _param_genvbo_size = glGetUniformLocation(*program, "sizes");
1776
1777 s_display_gaussian = new ProgramGLSL(
1778 "uniform sampler2DRect tex; void main(){\n"
1779 "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1780 "float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); gl_FragColor = vec4(vec3(v), 1.0);}");
1781
1782 s_display_dog = new ProgramGLSL(
1783 "uniform sampler2DRect tex; void main(){\n"
1784 "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1785 "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"
1786 "gl_FragColor = vec4(g, g, g, 1.0);}" );
1787
1788
1789 s_display_grad = new ProgramGLSL(
1790 "uniform sampler2DRect tex; void main(){\n"
1791 "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1792 "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); gl_FragColor = vec4(5.0 *vec3(v), 1.0); }");
1793
1794 s_display_keys= new ProgramGLSL(
1795 "uniform sampler2DRect tex; void main(){\n"
1796 "vec4 oc = texture2DRect(tex, gl_TexCoord[0].xy); \n"
1797 "vec4 cc = vec4(equal(abs(oc.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
1798 "bvec2 ff = lessThan(fract(gl_TexCoord[0].xy) , vec2(0.5));\n"
1799 "float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"
1800 "if(v == 0.0) discard; \n"
1801 "else if(oc.r > 0.0) gl_FragColor = vec4(1.0, 0.0, 0,1.0); \n"
1802 "else gl_FragColor = vec4(0.0,1.0,0.0,1.0); }" );
1803 }
1804
LoadOrientationShader(void)1805 void ShaderBagPKSL::LoadOrientationShader(void)
1806 {
1807 ostringstream out;
1808 if(GlobalUtil::_IsNvidia)
1809 {
1810 out << "#pragma optionNV(ifcvt none)\n"
1811 "#pragma optionNV(unroll all)\n";
1812 }
1813 out<<"\n"
1814 "#define GAUSSIAN_WF float("<<GlobalUtil::_OrientationGaussianFactor<<") \n"
1815 "#define SAMPLE_WF float("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
1816 "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
1817 "uniform sampler2DRect tex; uniform sampler2DRect gtex;\n"
1818 "uniform sampler2DRect otex; uniform vec4 size;\n"
1819 "void main() \n"
1820 "{ \n"
1821 " vec4 bins[10]; \n"
1822 " bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0); \n"
1823 " bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0); \n"
1824 " bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0); \n"
1825 " vec4 sift = texture2DRect(tex, gl_TexCoord[0].xy); \n"
1826 " vec2 pos = sift.xy; \n"
1827 " bool orientation_mode = (size.z != 0.0); \n"
1828 " float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"
1829 " //bool fixed_orientation = (size.z < 0.0); \n"
1830 " if(size.z < 0.0) {gl_FragData[0] = vec4(pos, 0.0, sigma); return;}"
1831 " float gsigma = sigma * GAUSSIAN_WF; \n"
1832 " vec2 win = abs(vec2(sigma * (SAMPLE_WF * GAUSSIAN_WF))); \n"
1833 " vec2 dim = size.xy; \n"
1834 " vec4 dist_threshold = vec4(win.x*win.x+0.5); \n"
1835 " float factor = -0.5/(gsigma*gsigma); \n"
1836 " vec4 sz; vec2 spos; \n"
1837 " //if(any(pos.xy <= float(1))) discard; \n"
1838 " sz.xy = max( pos - win, vec2(2.0,2.0)); \n"
1839 " sz.zw = min( pos + win, dim-vec2(3.0)); \n"
1840 " sz = floor(sz*0.5) + 0.5; ";
1841 //loop to get the histogram
1842
1843 out<<"\n"
1844 " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
1845 " { \n"
1846 " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
1847 " { \n"
1848 " vec2 offset = 2.0 * spos - pos - vec2(0.5); \n"
1849 " vec4 off = vec4(offset, offset + vec2(1)); \n"
1850 " vec4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww; \n"
1851 " bvec4 inside = lessThan(distsq, dist_threshold); \n"
1852 " if(any(inside)) \n"
1853 " { \n"
1854 " vec4 gg = texture2DRect(gtex, spos); \n"
1855 " vec4 oo = texture2DRect(otex, spos); \n"
1856 " vec4 weight = gg * exp(distsq * factor); \n"
1857 " vec4 idxv = floor(degrees(oo)*0.1); \n"
1858 " idxv+= (vec4(lessThan(idxv, vec4(0.0)))*36.0); \n"
1859 " vec4 vidx = fract(idxv * 0.25) * 4.0;//mod(idxv, 4.0); \n";
1860 //
1861 if(GlobalUtil::_UseDynamicIndexing)
1862 {
1863 // it might be slow on some GPUs
1864 out<<"\n"
1865 " for(int i = 0 ; i < 4; i++)\n"
1866 " {\n"
1867 " if(inside[i])\n"
1868 " {\n"
1869 " float idx = idxv[i]; \n"
1870 " vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0.0,1.0,2.0,3.0))); \n"
1871 " int iidx = int(floor(idx*0.25)); \n"
1872 " bins[iidx]+=inc; \n"
1873 " } \n"
1874 " } \n"
1875 " } \n"
1876 " } \n"
1877 " }";
1878
1879 }else
1880 {
1881 //nvfp40 still does not support dynamic array indexing
1882 //unrolled binary search
1883 //it seems to be faster than the dyanmic indexing version on some GPUs
1884 out<<"\n"
1885 " for(int i = 0 ; i < 4; i++)\n"
1886 " {\n"
1887 " if(inside[i])\n"
1888 " {\n"
1889 " float idx = idxv[i]; \n"
1890 " vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3))); \n"
1891 " if(idx < 16.0) \n"
1892 " { \n"
1893 " if(idx < 8.0) \n"
1894 " { \n"
1895 " if(idx < 4.0) { bins[0]+=inc;} \n"
1896 " else { bins[1]+=inc;} \n"
1897 " }else \n"
1898 " { \n"
1899 " if(idx < 12.0){ bins[2]+=inc;} \n"
1900 " else { bins[3]+=inc;} \n"
1901 " } \n"
1902 " }else if(idx < 32.0) \n"
1903 " { \n"
1904 " if(idx < 24.0) \n"
1905 " { \n"
1906 " if(idx <20.0) { bins[4]+=inc;} \n"
1907 " else { bins[5]+=inc;} \n"
1908 " }else \n"
1909 " { \n"
1910 " if(idx < 28.0){ bins[6]+=inc;} \n"
1911 " else { bins[7]+=inc;} \n"
1912 " } \n"
1913 " }else \n"
1914 " { \n"
1915 " bins[8]+=inc; \n"
1916 " } \n"
1917 " } \n"
1918 " } \n"
1919 " } \n"
1920 " } \n"
1921 " }";
1922
1923 }
1924
1925 //reuse the code from the unpacked version..
1926 ShaderBagGLSL::WriteOrientationCodeToStream(out);
1927
1928
1929
1930 ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
1931 if(program->IsNative())
1932 {
1933 s_orientation = program ;
1934 _param_orientation_gtex = glGetUniformLocation(*program, "gtex");
1935 _param_orientation_otex = glGetUniformLocation(*program, "otex");
1936 _param_orientation_size = glGetUniformLocation(*program, "size");
1937 }else
1938 {
1939 delete program;
1940 }
1941 }
1942
SetGenListStartParam(float width,int tex0)1943 void ShaderBagPKSL::SetGenListStartParam(float width, int tex0)
1944 {
1945 glUniform1f(_param_ftex_width, width);
1946 glUniform1i(_param_genlist_start_tex0, 0);
1947 }
1948
LoadGenListShader(int ndoglev,int nlev)1949 void ShaderBagPKSL::LoadGenListShader(int ndoglev,int nlev)
1950 {
1951 ProgramGLSL * program;
1952
1953 s_genlist_init_tight = new ProgramGLSL(
1954 "uniform sampler2DRect tex; void main ()\n"
1955 "{\n"
1956 " vec4 key = vec4(texture2DRect(tex, gl_TexCoord[0].xy).r, \n"
1957 " texture2DRect(tex, gl_TexCoord[1].xy).r, \n"
1958 " texture2DRect(tex, gl_TexCoord[2].xy).r, \n"
1959 " texture2DRect(tex, gl_TexCoord[3].xy).r); \n"
1960 " gl_FragColor = vec4(notEqual(key, vec4(0.0))); \n"
1961 "}");
1962
1963 s_genlist_init_ex = program = new ProgramGLSL(
1964 "uniform sampler2DRect tex; uniform vec4 bbox; void main ()\n"
1965 "{\n"
1966 " vec4 helper1 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[0].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1967 " vec4 helper2 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[1].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1968 " vec4 helper3 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[2].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1969 " vec4 helper4 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[3].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1970 " vec4 bx1 = vec4(lessThan(gl_TexCoord[0].xxyy, bbox)); \n"
1971 " vec4 bx4 = vec4(lessThan(gl_TexCoord[3].xxyy, bbox)); \n"
1972 " vec4 bx2 = vec4(bx4.xy, bx1.zw); \n"
1973 " vec4 bx3 = vec4(bx1.xy, bx4.zw);\n"
1974 " helper1 = min(min(bx1.xyxy, bx1.zzww), helper1);\n"
1975 " helper2 = min(min(bx2.xyxy, bx2.zzww), helper2);\n"
1976 " helper3 = min(min(bx3.xyxy, bx3.zzww), helper3);\n"
1977 " helper4 = min(min(bx4.xyxy, bx4.zzww), helper4);\n"
1978 " gl_FragColor.r = float(any(greaterThan(max(helper1.xy, helper1.zw), vec2(0.0)))); \n"
1979 " gl_FragColor.g = float(any(greaterThan(max(helper2.xy, helper2.zw), vec2(0.0)))); \n"
1980 " gl_FragColor.b = float(any(greaterThan(max(helper3.xy, helper3.zw), vec2(0.0)))); \n"
1981 " gl_FragColor.a = float(any(greaterThan(max(helper4.xy, helper4.zw), vec2(0.0)))); \n"
1982 "}");
1983 _param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
1984
1985 s_genlist_end = program = new ProgramGLSL(
1986 GlobalUtil::_KeepExtremumSign == 0 ?
1987
1988 "uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
1989 "{\n"
1990 " vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
1991 " vec2 pos = tc.rg; float index = tc.b;\n"
1992 " vec4 tk = texture2DRect( ktex, pos); \n"
1993 " vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
1994 " vec2 opos; \n"
1995 " opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
1996 " opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
1997 " gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, 1.0, tk.w);\n"
1998 "}" :
1999
2000 "uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
2001 "{\n"
2002 " vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
2003 " vec2 pos = tc.rg; float index = tc.b;\n"
2004 " vec4 tk = texture2DRect( ktex, pos); \n"
2005 " vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))) \n"
2006 " vec2 opos; \n"
2007 " opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
2008 " opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
2009 " gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, sign(tk.r), tk.w);\n"
2010 "}"
2011 );
2012
2013 _param_genlist_end_ktex = glGetUniformLocation(*program, "ktex");
2014
2015 //reduction ...
2016 s_genlist_histo = new ProgramGLSL(
2017 "uniform sampler2DRect tex; void main ()\n"
2018 "{\n"
2019 " vec4 helper; vec4 helper2; \n"
2020 " helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
2021 " helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
2022 " gl_FragColor.rg = helper2.xz + helper2.yw;\n"
2023 " helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
2024 " helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
2025 " gl_FragColor.ba= helper2.xz+helper2.yw;\n"
2026 "}");
2027
2028
2029 //read of the first part, which generates tex coordinates
2030
2031 s_genlist_start= program = ShaderBagGLSL::LoadGenListStepShader(1, 1);
2032 _param_ftex_width= glGetUniformLocation(*program, "width");
2033 _param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
2034 //stepping
2035 s_genlist_step = program = ShaderBagGLSL::LoadGenListStepShader(0, 1);
2036 _param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
2037
2038 }
UnloadProgram(void)2039 void ShaderBagPKSL::UnloadProgram(void)
2040 {
2041 glUseProgram(0);
2042 }
LoadKeypointShader(float dog_threshold,float edge_threshold)2043 void ShaderBagPKSL::LoadKeypointShader(float dog_threshold, float edge_threshold)
2044 {
2045 float threshold0 = dog_threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
2046 float threshold1 = dog_threshold;
2047 float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
2048 ostringstream out;;
2049 out<<setprecision(8);
2050
2051 if(GlobalUtil::_IsNvidia)
2052 {
2053 out << "#pragma optionNV(ifcvt none)\n"
2054 "#pragma optionNV(unroll all)\n";
2055
2056 }
2057 if(GlobalUtil::_KeepShaderLoop)
2058 {
2059 out << "#define REPEAT4(FUNCTION)\\\n"
2060 "for(int i = 0; i < 4; ++i)\\\n"
2061 "{\\\n"
2062 " FUNCTION(i);\\\n"
2063 "}\n";
2064 }else
2065 {
2066 //loop unroll
2067 out << "#define REPEAT4(FUNCTION)\\\n"
2068 "FUNCTION(0);\\\n"
2069 "FUNCTION(1);\\\n"
2070 "FUNCTION(2);\\\n"
2071 "FUNCTION(3);\n";
2072 }
2073 //tex(X)(Y)
2074 //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)
2075 //Y: (CDU) (CENTER 0, DOWN -1, UP +1)
2076
2077 if(GlobalUtil::_DarknessAdaption)
2078 {
2079 out << "#define THRESHOLD0(i) (" << threshold0 << "* ii[i])\n"
2080 "#define THRESHOLD1 (" << threshold1 << "* ii[0])\n"
2081 "#define THRESHOLD2 " << threshold2 << "\n"
2082 "#define DEFINE_EXTRA() vec4 ii = texture2DRect(texI, gl_TexCoord[0].xy); "
2083 "ii = min(2.0 * ii + 0.1, 1.0) \n"
2084 "#define MOVE_EXTRA(idx) ii[0] = ii[idx]\n";
2085 out << "uniform sampler2DRect texI;\n";
2086 }else
2087 {
2088 out << "#define THRESHOLD0(i) " << threshold0 << "\n"
2089 "#define THRESHOLD1 " << threshold1 << "\n"
2090 "#define THRESHOLD2 " << threshold2 << "\n"
2091 "#define DEFINE_EXTRA()\n"
2092 "#define MOVE_EXTRA(idx) \n" ;
2093 }
2094
2095 out<<
2096 "uniform sampler2DRect tex; uniform sampler2DRect texU;\n"
2097 "uniform sampler2DRect texD; void main ()\n"
2098 "{\n"
2099 " vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
2100 " vec4 ccc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
2101 " vec4 clc = texture2DRect(tex, gl_TexCoord[1].xy);\n"
2102 " vec4 crc = texture2DRect(tex, gl_TexCoord[2].xy);\n"
2103 " vec4 ccd = texture2DRect(tex, gl_TexCoord[3].xy);\n"
2104 " vec4 ccu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
2105 " vec4 cld = texture2DRect(tex, gl_TexCoord[5].xy);\n"
2106 " vec4 clu = texture2DRect(tex, gl_TexCoord[6].xy);\n"
2107 " vec4 crd = texture2DRect(tex, gl_TexCoord[7].xy);\n"
2108 " vec4 cru = texture2DRect(tex, TexRU.xy);\n"
2109 " vec4 cc = ccc;\n"
2110 " vec4 v1[4], v2[4];\n"
2111 " v1[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2112 " v1[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2113 " v1[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2114 " v1[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2115 " v2[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2116 " v2[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2117 " v2[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2118 " v2[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2119 " DEFINE_EXTRA();\n";
2120
2121 //test against 8 neighbours
2122 //use variable to identify type of extremum
2123 //1.0 for local maximum and -1.0 for minimum
2124 out <<
2125 " vec4 key = vec4(0.0); \n"
2126 " #define KEYTEST_STEP0(i) \\\n"
2127 " {\\\n"
2128 " bvec4 test1 = greaterThan(vec4(cc[i]), max(v1[i], v2[i])), test2 = lessThan(vec4(cc[i]), min(v1[i], v2[i]));\\\n"
2129 " key[i] = cc[i] > float(THRESHOLD0(i)) && all(test1)?1.0: 0.0;\\\n"
2130 " key[i] = cc[i] < float(-THRESHOLD0(i)) && all(test2)? -1.0: key[i];\\\n"
2131 " }\n"
2132 " REPEAT4(KEYTEST_STEP0);\n"
2133 " if(gl_TexCoord[0].x < 1.0) {key.rb = vec2(0.0);}\n"
2134 " if(gl_TexCoord[0].y < 1.0) {key.rg = vec2(0.0);}\n"
2135 " gl_FragColor = vec4(0.0);\n"
2136 " if(any(notEqual(key, vec4(0.0)))) {\n";
2137
2138 //do edge supression first..
2139 //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
2140 //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
2141
2142 out<<
2143 " float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"
2144 " #define EDGE_SUPPRESION(i) \\\n"
2145 " if(key[i] != 0.0)\\\n"
2146 " {\\\n"
2147 " vec4 D2 = v1[i].xyzw - cc[i];\\\n"
2148 " vec2 D4 = v2[i].xw - v2[i].yz;\\\n"
2149 " vec2 D5 = 0.5*(v1[i].yw-v1[i].xz); \\\n"
2150 " fx[i] = D5.x; fy[i] = D5.y ;\\\n"
2151 " fxx[i] = D2.x + D2.y;\\\n"
2152 " fyy[i] = D2.z + D2.w;\\\n"
2153 " fxy[i] = 0.25*(D4.x + D4.y);\\\n"
2154 " float fxx_plus_fyy = fxx[i] + fyy[i];\\\n"
2155 " float score_up = fxx_plus_fyy*fxx_plus_fyy; \\\n"
2156 " float score_down = (fxx[i]*fyy[i] - fxy[i]*fxy[i]);\\\n"
2157 " if( score_down <= 0.0 || score_up > THRESHOLD2 * score_down)key[i] = 0.0;\\\n"
2158 " }\n"
2159 " REPEAT4(EDGE_SUPPRESION);\n"
2160 " if(any(notEqual(key, vec4(0.0)))) {\n";
2161
2162 ////////////////////////////////////////////////
2163 //read 9 pixels of upper/lower level
2164 out<<
2165 " vec4 v4[4], v5[4], v6[4];\n"
2166 " ccc = texture2DRect(texU, gl_TexCoord[0].xy);\n"
2167 " clc = texture2DRect(texU, gl_TexCoord[1].xy);\n"
2168 " crc = texture2DRect(texU, gl_TexCoord[2].xy);\n"
2169 " ccd = texture2DRect(texU, gl_TexCoord[3].xy);\n"
2170 " ccu = texture2DRect(texU, gl_TexCoord[4].xy);\n"
2171 " cld = texture2DRect(texU, gl_TexCoord[5].xy);\n"
2172 " clu = texture2DRect(texU, gl_TexCoord[6].xy);\n"
2173 " crd = texture2DRect(texU, gl_TexCoord[7].xy);\n"
2174 " cru = texture2DRect(texU, TexRU.xy);\n"
2175 " vec4 cu = ccc;\n"
2176 " v4[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2177 " v4[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2178 " v4[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2179 " v4[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2180 " v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2181 " v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2182 " v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2183 " v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2184 <<
2185 " #define KEYTEST_STEP1(i)\\\n"
2186 " if(key[i] == 1.0)\\\n"
2187 " {\\\n"
2188 " bvec4 test = lessThan(vec4(cc[i]), max(v4[i], v6[i])); \\\n"
2189 " if(cc[i] < cu[i] || any(test))key[i] = 0.0; \\\n"
2190 " }else if(key[i] == -1.0)\\\n"
2191 " {\\\n"
2192 " bvec4 test = greaterThan(vec4(cc[i]), min(v4[i], v6[i])); \\\n"
2193 " if(cc[i] > cu[i] || any(test) )key[i] = 0.0; \\\n"
2194 " }\n"
2195 " REPEAT4(KEYTEST_STEP1);\n"
2196 " if(any(notEqual(key, vec4(0.0)))) { \n"
2197 <<
2198 " ccc = texture2DRect(texD, gl_TexCoord[0].xy);\n"
2199 " clc = texture2DRect(texD, gl_TexCoord[1].xy);\n"
2200 " crc = texture2DRect(texD, gl_TexCoord[2].xy);\n"
2201 " ccd = texture2DRect(texD, gl_TexCoord[3].xy);\n"
2202 " ccu = texture2DRect(texD, gl_TexCoord[4].xy);\n"
2203 " cld = texture2DRect(texD, gl_TexCoord[5].xy);\n"
2204 " clu = texture2DRect(texD, gl_TexCoord[6].xy);\n"
2205 " crd = texture2DRect(texD, gl_TexCoord[7].xy);\n"
2206 " cru = texture2DRect(texD, TexRU.xy);\n"
2207 " vec4 cd = ccc;\n"
2208 " v5[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2209 " v5[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2210 " v5[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2211 " v5[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2212 " v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2213 " v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2214 " v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2215 " v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2216 <<
2217 " #define KEYTEST_STEP2(i)\\\n"
2218 " if(key[i] == 1.0)\\\n"
2219 " {\\\n"
2220 " bvec4 test = lessThan(vec4(cc[i]), max(v5[i], v6[i]));\\\n"
2221 " if(cc[i] < cd[i] || any(test))key[i] = 0.0; \\\n"
2222 " }else if(key[i] == -1.0)\\\n"
2223 " {\\\n"
2224 " bvec4 test = greaterThan(vec4(cc[i]), min(v5[i], v6[i]));\\\n"
2225 " if(cc[i] > cd[i] || any(test))key[i] = 0.0; \\\n"
2226 " }\n"
2227 " REPEAT4(KEYTEST_STEP2);\n"
2228 " float keysum = dot(abs(key), vec4(1, 1, 1, 1)) ;\n"
2229 " //assume there is only one keypoint in the four. \n"
2230 " if(keysum==1.0) {\n";
2231
2232 //////////////////////////////////////////////////////////////////////
2233 if(GlobalUtil::_SubpixelLocalization)
2234
2235 out <<
2236 " vec3 offset = vec3(0.0, 0.0, 0.0); \n"
2237 " #define TESTMOVE_KEYPOINT(idx) \\\n"
2238 " if(key[idx] != 0.0) \\\n"
2239 " {\\\n"
2240 " cu[0] = cu[idx]; cd[0] = cd[idx]; cc[0] = cc[idx]; \\\n"
2241 " v4[0] = v4[idx]; v5[0] = v5[idx]; \\\n"
2242 " fxy[0] = fxy[idx]; fxx[0] = fxx[idx]; fyy[0] = fyy[idx]; \\\n"
2243 " fx[0] = fx[idx]; fy[0] = fy[idx]; MOVE_EXTRA(idx); \\\n"
2244 " }\n"
2245 " TESTMOVE_KEYPOINT(1);\n"
2246 " TESTMOVE_KEYPOINT(2);\n"
2247 " TESTMOVE_KEYPOINT(3);\n"
2248 <<
2249
2250 " float fs = 0.5*( cu[0] - cd[0] ); \n"
2251 " float fss = cu[0] + cd[0] - cc[0] - cc[0];\n"
2252 " float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);\n"
2253 " float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);\n"
2254 " vec4 A0, A1, A2 ; \n"
2255 " A0 = vec4(fxx[0], fxy[0], fxs, -fx[0]); \n"
2256 " A1 = vec4(fxy[0], fyy[0], fys, -fy[0]); \n"
2257 " A2 = vec4(fxs, fys, fss, -fs); \n"
2258 " vec3 x3 = abs(vec3(fxx[0], fxy[0], fxs)); \n"
2259 " float maxa = max(max(x3.x, x3.y), x3.z); \n"
2260 " if(maxa >= 1e-10 ) \n"
2261 " { \n"
2262 " if(x3.y ==maxa ) \n"
2263 " { \n"
2264 " vec4 TEMP = A1; A1 = A0; A0 = TEMP; \n"
2265 " }else if( x3.z == maxa ) \n"
2266 " { \n"
2267 " vec4 TEMP = A2; A2 = A0; A0 = TEMP; \n"
2268 " } \n"
2269 " A0 /= A0.x; \n"
2270 " A1 -= A1.x * A0; \n"
2271 " A2 -= A2.x * A0; \n"
2272 " vec2 x2 = abs(vec2(A1.y, A2.y)); \n"
2273 " if( x2.y > x2.x ) \n"
2274 " { \n"
2275 " vec3 TEMP = A2.yzw; \n"
2276 " A2.yzw = A1.yzw; \n"
2277 " A1.yzw = TEMP; \n"
2278 " x2.x = x2.y; \n"
2279 " } \n"
2280 " if(x2.x >= 1e-10) { \n"
2281 " A1.yzw /= A1.y; \n"
2282 " A2.yzw -= A2.y * A1.yzw; \n"
2283 " if(abs(A2.z) >= 1e-10) {\n"
2284 " offset.z = A2.w /A2.z; \n"
2285 " offset.y = A1.w - offset.z*A1.z; \n"
2286 " offset.x = A0.w - offset.z*A0.z - offset.y*A0.y; \n"
2287 " bool test = (abs(cc[0] + 0.5*dot(vec3(fx[0], fy[0], fs), offset ))>float(THRESHOLD1)) ;\n"
2288 " if(!test || any( greaterThan(abs(offset), vec3(1.0)))) key = vec4(0.0);\n"
2289 " }\n"
2290 " }\n"
2291 " }\n"
2292 <<"\n"
2293 " float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
2294 " gl_FragColor = vec4(keyv, offset);\n"
2295 " }}}}\n"
2296 "}\n" <<'\0';
2297
2298 else out << "\n"
2299 " float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
2300 " gl_FragColor = vec4(keyv, 0.0, 0.0, 0.0);\n"
2301 " }}}}\n"
2302 "}\n" <<'\0';
2303
2304 ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
2305 s_keypoint = program ;
2306
2307 //parameter
2308 _param_dog_texu = glGetUniformLocation(*program, "texU");
2309 _param_dog_texd = glGetUniformLocation(*program, "texD");
2310 if(GlobalUtil::_DarknessAdaption) _param_dog_texi = glGetUniformLocation(*program, "texI");
2311 }
SetDogTexParam(int texU,int texD)2312 void ShaderBagPKSL::SetDogTexParam(int texU, int texD)
2313 {
2314 glUniform1i(_param_dog_texu, 1);
2315 glUniform1i(_param_dog_texd, 2);
2316 if(GlobalUtil::_DarknessAdaption)glUniform1i(_param_dog_texi, 3);
2317 }
SetGenListStepParam(int tex,int tex0)2318 void ShaderBagPKSL::SetGenListStepParam(int tex, int tex0)
2319 {
2320 glUniform1i(_param_genlist_step_tex0, 1);
2321 }
2322
SetGenVBOParam(float width,float fwidth,float size)2323 void ShaderBagPKSL::SetGenVBOParam(float width, float fwidth,float size)
2324 {
2325 float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
2326 glUniform4fv(_param_genvbo_size, 1, sizes);
2327 }
SetGradPassParam(int texP)2328 void ShaderBagPKSL::SetGradPassParam(int texP)
2329 {
2330 glUniform1i(_param_grad_pass_texp, 1);
2331 }
2332
LoadDescriptorShader()2333 void ShaderBagPKSL::LoadDescriptorShader()
2334 {
2335 GlobalUtil::_DescriptorPPT = 16;
2336 LoadDescriptorShaderF2();
2337 s_rect_description = LoadDescriptorProgramRECT();
2338 }
2339
LoadDescriptorProgramRECT()2340 ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramRECT()
2341 {
2342 //one shader outpout 128/8 = 16 , each fragout encodes 4
2343 //const double twopi = 2.0*3.14159265358979323846;
2344 //const double rpi = 8.0/twopi;
2345 ostringstream out;
2346 out<<setprecision(8);
2347 if(GlobalUtil::_KeepShaderLoop)
2348 {
2349 out << "#define REPEAT4(FUNCTION)\\\n"
2350 "for(int i = 0; i < 4; ++i)\\\n"
2351 "{\\\n"
2352 " FUNCTION(i);\\\n"
2353 "}\n";
2354 }else
2355 {
2356 //loop unroll for ATI
2357 out << "#define REPEAT4(FUNCTION)\\\n"
2358 "FUNCTION(0);\\\n"
2359 "FUNCTION(1);\\\n"
2360 "FUNCTION(2);\\\n"
2361 "FUNCTION(3);\n";
2362 }
2363
2364 out<<"\n"
2365 "#define M_PI 3.14159265358979323846\n"
2366 "#define TWO_PI (2.0*M_PI)\n"
2367 "#define RPI 1.2732395447351626861510701069801\n"
2368 "#define WF size.z\n"
2369 "uniform sampler2DRect tex; \n"
2370 "uniform sampler2DRect gtex; \n"
2371 "uniform sampler2DRect otex; \n"
2372 "uniform vec4 dsize; \n"
2373 "uniform vec3 size; \n"
2374 "void main() \n"
2375 "{\n"
2376 " vec2 dim = size.xy; //image size \n"
2377 " float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
2378 " float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5)); \n"
2379 " index = floor(index*0.125)+ 0.49; \n"
2380 " vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
2381 " vec2 pos = texture2DRect(tex, coord).xy; \n"
2382 " vec2 wsz = texture2DRect(tex, coord).zw;\n"
2383 " float aspect_ratio = wsz.y / wsz.x;\n"
2384 " float aspect_sq = aspect_ratio * aspect_ratio; \n"
2385 " vec2 spt = wsz * 0.25; vec2 ispt = 1.0 / spt; \n";
2386
2387 //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
2388 //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
2389 //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
2390 //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
2391 out<<
2392 " vec4 temp; vec2 pt; \n"
2393 " pt.x = pos.x + fract(idx*0.25) * wsz.x; \n"
2394 " pt.y = pos.y + (floor(idx*0.25) + 0.5) * spt.y; \n";
2395
2396 //get a horizontal bounding box of the rotated rectangle
2397 out<<
2398 " vec4 sz; \n"
2399 " sz.xy = max(pt - spt, vec2(2,2));\n"
2400 " sz.zw = min(pt + spt, dim - vec2(3)); \n"
2401 " sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center
2402 //get voting for two box
2403
2404 out<<"\n"
2405 " vec4 DA, DB; vec2 spos; \n"
2406 " DA = DB = vec4(0.0, 0.0, 0.0, 0.0); \n"
2407 " vec4 nox = vec4(0.0, 1.0, 0.0, 1.0); \n"
2408 " vec4 noy = vec4(0.0, 0.0, 1.0, 1.0); \n"
2409 " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
2410 " { \n"
2411 " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
2412 " { \n"
2413 " vec2 tpt = spos * 2.0 - pt - 0.5; \n"
2414 " vec4 nx = (tpt.x + nox) * ispt.x; \n"
2415 " vec4 ny = (tpt.y + noy) * ispt.y; \n"
2416 " vec4 nxn = abs(nx), nyn = abs(ny); \n"
2417 " bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0)); \n"
2418 " if(any(inside))\n"
2419 " {\n"
2420 " vec4 gg = texture2DRect(gtex, spos);\n"
2421 " vec4 oo = texture2DRect(otex, spos);\n"
2422 //" vec4 cc = cos(oo), ss = sin(oo); \n"
2423 //" oo = atan(ss* aspect_ratio, cc); \n"
2424 //" gg = gg * sqrt(ss * ss * aspect_sq + cc * cc); \n "
2425 " vec4 theta0 = (- oo)*RPI;\n"
2426 " vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0); \n"
2427 " vec4 theta1 = floor(theta); \n"
2428 " vec4 weight = (vec4(1) - nxn) * (vec4(1) - nyn) * gg; \n"
2429 " vec4 weight2 = (theta - theta1) * weight; \n"
2430 " vec4 weight1 = weight - weight2; \n"
2431 " #define ADD_DESCRIPTOR(i) \\\n"
2432 " if(inside[i])\\\n"
2433 " {\\\n"
2434 " DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \\\n"
2435 " DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \\\n"
2436 " DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \\\n"
2437 " DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \\\n"
2438 " }\n"
2439 " REPEAT4(ADD_DESCRIPTOR);\n"
2440 " }\n"
2441 " }\n"
2442 " }\n";
2443 out<<
2444 " gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
2445 "}\n"<<'\0';
2446
2447 ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
2448 if(program->IsNative())
2449 {
2450 return program;
2451 }
2452 else
2453 {
2454 delete program;
2455 return NULL;
2456 }
2457 }
2458
LoadDescriptorProgramPKSL()2459 ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramPKSL()
2460 {
2461 //one shader outpout 128/8 = 16 , each fragout encodes 4
2462 //const double twopi = 2.0*3.14159265358979323846;
2463 //const double rpi = 8.0/twopi;
2464 ostringstream out;
2465 out<<setprecision(8);
2466
2467 if(GlobalUtil::_KeepShaderLoop)
2468 {
2469 out << "#define REPEAT4(FUNCTION)\\\n"
2470 "for(int i = 0; i < 4; ++i)\\\n"
2471 "{\\\n"
2472 " FUNCTION(i);\\\n"
2473 "}\n";
2474 }else
2475 {
2476 //loop unroll for ATI
2477 out << "#define REPEAT4(FUNCTION)\\\n"
2478 "FUNCTION(0);\\\n"
2479 "FUNCTION(1);\\\n"
2480 "FUNCTION(2);\\\n"
2481 "FUNCTION(3);\n";
2482 }
2483
2484 out<<"\n"
2485 "#define M_PI 3.14159265358979323846\n"
2486 "#define TWO_PI (2.0*M_PI)\n"
2487 "#define RPI 1.2732395447351626861510701069801\n"
2488 "#define WF size.z\n"
2489 "uniform sampler2DRect tex; \n"
2490 "uniform sampler2DRect gtex; \n"
2491 "uniform sampler2DRect otex; \n"
2492 "uniform vec4 dsize; \n"
2493 "uniform vec3 size; \n"
2494 "void main() \n"
2495 "{\n"
2496 " vec2 dim = size.xy; //image size \n"
2497 " float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
2498 " float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5)); \n"
2499 " index = floor(index*0.125)+ 0.49; \n"
2500 " vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
2501 " vec2 pos = texture2DRect(tex, coord).xy; \n"
2502 " if(any(lessThan(pos.xy, vec2(1.0))) || any(greaterThan(pos.xy, dim-1.0))) "
2503 " //discard; \n"
2504 " { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
2505 " float anglef = texture2DRect(tex, coord).z;\n"
2506 " if(anglef > M_PI) anglef -= TWO_PI;\n"
2507 " float sigma = texture2DRect(tex, coord).w; \n"
2508 " float spt = abs(sigma * WF); //default to be 3*sigma \n";
2509 //rotation
2510 out<<
2511 " vec4 cscs, rots; \n"
2512 " cscs.x = cos(anglef); cscs.y = sin(anglef); \n"
2513 " cscs.zw = - cscs.xy; \n"
2514 " rots = cscs /spt; \n"
2515 " cscs *= spt; \n";
2516
2517 //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
2518 //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
2519 //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
2520 //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
2521 out<<
2522 " vec4 temp; vec2 pt, offsetpt; \n"
2523 " /*the fraction part of idx is .5*/ \n"
2524 " offsetpt.x = 4.0* fract(idx*0.25) - 2.0; \n"
2525 " offsetpt.y = floor(idx*0.25) - 1.5; \n"
2526 " temp = cscs.xwyx*offsetpt.xyxy; \n"
2527 " pt = pos + temp.xz + temp.yw; \n";
2528
2529 //get a horizontal bounding box of the rotated rectangle
2530 out<<
2531 " vec2 bwin = abs(cscs.xy); \n"
2532 " float bsz = bwin.x + bwin.y; \n"
2533 " vec4 sz; \n"
2534 " sz.xy = max(pt - vec2(bsz), vec2(2,2));\n"
2535 " sz.zw = min(pt + vec2(bsz), dim - vec2(3)); \n"
2536 " sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center
2537 //get voting for two box
2538
2539 out<<"\n"
2540 " vec4 DA, DB; vec2 spos; \n"
2541 " DA = DB = vec4(0.0, 0.0, 0.0, 0.0); \n"
2542 " vec4 nox = vec4(0.0, rots.xy, rots.x + rots.y); \n"
2543 " vec4 noy = vec4(0.0, rots.wx, rots.w + rots.x); \n"
2544 " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
2545 " { \n"
2546 " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
2547 " { \n"
2548 " vec2 tpt = spos * 2.0 - pt - 0.5; \n"
2549 " vec4 temp = rots.xywx * tpt.xyxy; \n"
2550 " vec2 temp2 = temp.xz + temp.yw; \n"
2551 " vec4 nx = temp2.x + nox; \n"
2552 " vec4 ny = temp2.y + noy; \n"
2553 " vec4 nxn = abs(nx), nyn = abs(ny); \n"
2554 " bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0)); \n"
2555 " if(any(inside))\n"
2556 " {\n"
2557 " vec4 gg = texture2DRect(gtex, spos);\n"
2558 " vec4 oo = texture2DRect(otex, spos);\n"
2559 " vec4 theta0 = (anglef - oo)*RPI;\n"
2560 " vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0); \n"
2561 " vec4 theta1 = floor(theta); \n"
2562 " vec4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y; \n"
2563 " vec4 ww = exp(-0.125 * (diffx * diffx + diffy * diffy )); \n"
2564 " vec4 weight = (vec4(1) - nxn) * (vec4(1) - nyn) * gg * ww; \n"
2565 " vec4 weight2 = (theta - theta1) * weight; \n"
2566 " vec4 weight1 = weight - weight2; \n"
2567 " #define ADD_DESCRIPTOR(i) \\\n"
2568 " if(inside[i])\\\n"
2569 " {\\\n"
2570 " DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \\\n"
2571 " DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \\\n"
2572 " DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \\\n"
2573 " DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \\\n"
2574 " }\n"
2575 " REPEAT4(ADD_DESCRIPTOR);\n"
2576 " }\n"
2577 " }\n"
2578 " }\n";
2579 out<<
2580 " gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
2581 "}\n"<<'\0';
2582
2583 ProgramGLSL * program = new ProgramGLSL(out.str().c_str());
2584 if(program->IsNative())
2585 {
2586 return program;
2587 }
2588 else
2589 {
2590 delete program;
2591 return NULL;
2592 }
2593 }
2594
LoadDescriptorShaderF2()2595 void ShaderBagPKSL::LoadDescriptorShaderF2()
2596 {
2597
2598 ProgramGLSL * program = LoadDescriptorProgramPKSL();
2599 if( program )
2600 {
2601 s_descriptor_fp = program;
2602 _param_descriptor_gtex = glGetUniformLocation(*program, "gtex");
2603 _param_descriptor_otex = glGetUniformLocation(*program, "otex");
2604 _param_descriptor_size = glGetUniformLocation(*program, "size");
2605 _param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
2606 }
2607 }
2608
2609
2610
SetSimpleOrientationInput(int oTex,float sigma,float sigma_step)2611 void ShaderBagPKSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
2612 {
2613 glUniform1i(_param_orientation_gtex, 1);
2614 glUniform2f(_param_orientation_size, sigma, sigma_step);
2615 }
2616
2617
SetFeatureOrientationParam(int gtex,int width,int height,float sigma,int otex,float step)2618 void ShaderBagPKSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step)
2619 {
2620 ///
2621 glUniform1i(_param_orientation_gtex, 1);
2622 glUniform1i(_param_orientation_otex, 2);
2623
2624 float size[4];
2625 size[0] = (float)width;
2626 size[1] = (float)height;
2627 size[2] = sigma;
2628 size[3] = step;
2629 glUniform4fv(_param_orientation_size, 1, size);
2630 }
2631
SetFeatureDescirptorParam(int gtex,int otex,float dwidth,float fwidth,float width,float height,float sigma)2632 void ShaderBagPKSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma)
2633 {
2634 if(sigma == 0 && s_rect_description)
2635 {
2636 //rectangle description mode
2637 s_rect_description->UseProgram();
2638 GLint param_descriptor_gtex = glGetUniformLocation(*s_rect_description, "gtex");
2639 GLint param_descriptor_otex = glGetUniformLocation(*s_rect_description, "otex");
2640 GLint param_descriptor_size = glGetUniformLocation(*s_rect_description, "size");
2641 GLint param_descriptor_dsize = glGetUniformLocation(*s_rect_description, "dsize");
2642 ///
2643 glUniform1i(param_descriptor_gtex, 1);
2644 glUniform1i(param_descriptor_otex, 2);
2645
2646 float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
2647 glUniform4fv(param_descriptor_dsize, 1, dsize);
2648 float size[3];
2649 size[0] = width;
2650 size[1] = height;
2651 size[2] = GlobalUtil::_DescriptorWindowFactor;
2652 glUniform3fv(param_descriptor_size, 1, size);
2653 }else
2654 {
2655 ///
2656 glUniform1i(_param_descriptor_gtex, 1);
2657 glUniform1i(_param_descriptor_otex, 2);
2658
2659
2660 float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
2661 glUniform4fv(_param_descriptor_dsize, 1, dsize);
2662 float size[3];
2663 size[0] = width;
2664 size[1] = height;
2665 size[2] = GlobalUtil::_DescriptorWindowFactor;
2666 glUniform3fv(_param_descriptor_size, 1, size);
2667 }
2668
2669 }
2670
2671
SetGenListEndParam(int ktex)2672 void ShaderBagPKSL::SetGenListEndParam(int ktex)
2673 {
2674 glUniform1i(_param_genlist_end_ktex, 1);
2675 }
SetGenListInitParam(int w,int h)2676 void ShaderBagPKSL::SetGenListInitParam(int w, int h)
2677 {
2678 float bbox[4] = {(w -1.0f) * 0.5f +0.25f, (w-1.0f) * 0.5f - 0.25f, (h - 1.0f) * 0.5f + 0.25f, (h-1.0f) * 0.5f - 0.25f};
2679 glUniform4fv(_param_genlist_init_bbox, 1, bbox);
2680 }
2681
SetMarginCopyParam(int xmax,int ymax)2682 void ShaderBagPKSL::SetMarginCopyParam(int xmax, int ymax)
2683 {
2684 float truncate[4];
2685 truncate[0] = (xmax - 0.5f) * 0.5f; //((xmax + 1) >> 1) - 0.5f;
2686 truncate[1] = (ymax - 0.5f) * 0.5f; //((ymax + 1) >> 1) - 0.5f;
2687 truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f;
2688 truncate[3] = truncate[2] + (((ymax % 2) == 1)? 0.0f : 2.0f);
2689 glUniform4fv(_param_margin_copy_truncate, 1, truncate);
2690 }
2691