1 ////////////////////////////////////////////////////////////////////////////
2 //	File:		SiftMatch.cpp
3 //	Author:		Changchang Wu
4 //	Description :	implementation of SiftMatchGPU and SiftMatchGL
5 //
6 //
7 //	Copyright (c) 2007 University of North Carolina at Chapel Hill
8 //	All Rights Reserved
9 //
10 //	Permission to use, copy, modify and distribute this software and its
11 //	documentation for educational, research and non-profit purposes, without
12 //	fee, and without a written agreement is hereby granted, provided that the
13 //	above copyright notice and the following paragraph appear in all copies.
14 //
15 //	The University of North Carolina at Chapel Hill make no representations
16 //	about the suitability of this software for any purpose. It is provided
17 //	'as is' without express or implied warranty.
18 //
19 //	Please send BUG REPORTS to ccwu@cs.unc.edu
20 //
21 ////////////////////////////////////////////////////////////////////////////
22 
23 #include "GL/glew.h"
24 #include <iostream>
25 #include <iomanip>
26 #include <vector>
27 #include <sstream>
28 #include <algorithm>
29 using namespace std;
30 #include <string.h>
31 #include "GlobalUtil.h"
32 
33 #include "ProgramGLSL.h"
34 #include "GLTexImage.h"
35 #include "SiftGPU.h"
36 #include "SiftMatch.h"
37 #include "FrameBufferObject.h"
38 
39 #if defined(CUDA_SIFTGPU_ENABLED)
40 #include "CuTexImage.h"
41 #include "SiftMatchCU.h"
42 #endif
43 
44 
SiftMatchGL(int max_sift,int use_glsl)45 SiftMatchGL::SiftMatchGL(int max_sift, int use_glsl): SiftMatchGPU()
46 {
47 	s_multiply = s_col_max = s_row_max = s_guided_mult = NULL;
48 	_num_sift[0] = _num_sift[1] = 0;
49 	_id_sift[0] = _id_sift[1] = 0;
50 	_have_loc[0] = _have_loc[1] = 0;
51 	__max_sift = max_sift <=0 ? 4096 : ((max_sift + 31)/ 32 * 32) ;
52 	_pixel_per_sift = 32; //must be 32
53 	_sift_num_stripe = 1;
54 	_sift_per_stripe = 1;
55 	_sift_per_row = _sift_per_stripe * _sift_num_stripe;
56 	_initialized = 0;
57 }
58 
~SiftMatchGL()59 SiftMatchGL::~SiftMatchGL()
60 {
61 	if(s_multiply) delete s_multiply;
62 	if(s_guided_mult) delete s_guided_mult;
63 	if(s_col_max) delete s_col_max;
64 	if(s_row_max) delete s_row_max;
65 }
66 
Allocate(int max_sift,int mbm)67 bool SiftMatchGL::Allocate(int max_sift, int mbm) {
68   SetMaxSift(max_sift);
69   return glGetError() == GL_NO_ERROR;
70 }
71 
SetMaxSift(int max_sift)72 void SiftMatchGL::SetMaxSift(int max_sift)
73 {
74 
75 	max_sift = ((max_sift + 31)/32)*32;
76 	if(max_sift > GlobalUtil::_texMaxDimGL) max_sift = GlobalUtil::_texMaxDimGL;
77 	if(max_sift > __max_sift)
78 	{
79 		__max_sift = max_sift;
80 		AllocateSiftMatch();
81 		_have_loc[0] = _have_loc[1] = 0;
82 		_id_sift[0] = _id_sift[1] = -1;
83 		_num_sift[0] = _num_sift[1] = 1;
84 	}else
85 	{
86 		__max_sift = max_sift;
87 	}
88 }
89 
AllocateSiftMatch()90 void SiftMatchGL::AllocateSiftMatch()
91 {
92 	//parameters, number of sift is limited by the texture size
93 	if(__max_sift > GlobalUtil::_texMaxDimGL) __max_sift = GlobalUtil::_texMaxDimGL;
94 	///
95 	int h = __max_sift / _sift_per_row;
96 	int n = (GlobalUtil::_texMaxDimGL + h - 1) / GlobalUtil::_texMaxDimGL;
97 	if ( n > 1) {_sift_num_stripe *= n; _sift_per_row *= n; }
98 
99 	//initialize
100 
101 	_texDes[0].InitTexture(_sift_per_row * _pixel_per_sift, __max_sift / _sift_per_row, 0,GL_RGBA8);
102 	_texDes[1].InitTexture(_sift_per_row * _pixel_per_sift, __max_sift / _sift_per_row, 0, GL_RGBA8);
103 	_texLoc[0].InitTexture(_sift_per_row , __max_sift / _sift_per_row, 0);
104 	_texLoc[1].InitTexture(_sift_per_row , __max_sift / _sift_per_row, 0);
105 
106 	if(GlobalUtil::_SupportNVFloat || GlobalUtil::_SupportTextureRG)
107 	{
108 		//use single-component texture to save memory
109 #ifndef GL_R32F
110 #define GL_R32F 0x822E
111 #endif
112 		GLuint format = GlobalUtil::_SupportNVFloat ? GL_FLOAT_R_NV : GL_R32F;
113 		_texDot.InitTexture(__max_sift, __max_sift, 0, format);
114 		_texMatch[0].InitTexture(16, __max_sift / 16, 0, format);
115 		_texMatch[1].InitTexture(16, __max_sift / 16, 0, format);
116 	}else
117 	{
118 		_texDot.InitTexture(__max_sift, __max_sift, 0);
119 		_texMatch[0].InitTexture(16, __max_sift / 16, 0);
120 		_texMatch[1].InitTexture(16, __max_sift / 16, 0);
121 	}
122 
123 }
InitSiftMatch()124 void SiftMatchGL::InitSiftMatch()
125 {
126 	if(_initialized) return;
127 	GlobalUtil::InitGLParam(0);
128 	if(GlobalUtil::_GoodOpenGL == 0) return;
129 	AllocateSiftMatch();
130 	LoadSiftMatchShadersGLSL();
131 	_initialized = 1;
132 }
133 
134 
SetDescriptors(int index,int num,const unsigned char * descriptors,int id)135 void SiftMatchGL::SetDescriptors(int index, int num, const unsigned char* descriptors, int id)
136 {
137 	if(_initialized == 0) return;
138 	if (index > 1) index = 1;
139 	if (index < 0) index = 0;
140 	_have_loc[index] = 0;
141 
142 	//the same feature is already set
143 	if(id !=-1 && id == _id_sift[index]) return ;
144 	_id_sift[index] = id;
145 
146 	if(num > __max_sift) num = __max_sift;
147 
148 	sift_buffer.resize(num * 128 /4);
149 	memcpy(&sift_buffer[0], descriptors, 128 * num);
150 	_num_sift[index] = num;
151 	int w = _sift_per_row * _pixel_per_sift;
152 	int h = (num + _sift_per_row  - 1)/ _sift_per_row;
153 	sift_buffer.resize(w * h * 4, 0);
154 	_texDes[index].SetImageSize(w , h);
155 	_texDes[index].BindTex();
156 	if(_sift_num_stripe == 1)
157 	{
158 		glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, w, h, GL_RGBA,  GL_UNSIGNED_BYTE, &sift_buffer[0]);
159 	}else
160 	{
161 		for(int i = 0; i < _sift_num_stripe; ++i)
162 		{
163 			int ws = _sift_per_stripe * _pixel_per_sift;
164 			int x = i * ws;
165 			int pos = i * ws * h * 4;
166 			glTexSubImage2D(GlobalUtil::_texTarget, 0, x, 0, ws, h, GL_RGBA, GL_UNSIGNED_BYTE, &sift_buffer[pos]);
167 		}
168 	}
169 	_texDes[index].UnbindTex();
170 
171 }
172 
SetFeautreLocation(int index,const float * locations,int gap)173 void SiftMatchGL::SetFeautreLocation(int index, const float* locations, int gap)
174 {
175 	if(_num_sift[index] <=0) return;
176 	int w = _sift_per_row ;
177 	int h = (_num_sift[index] + _sift_per_row  - 1)/ _sift_per_row;
178 	sift_buffer.resize(_num_sift[index] * 2);
179 	if(gap == 0)
180 	{
181 		memcpy(&sift_buffer[0], locations, _num_sift[index] * 2 * sizeof(float));
182 	}else
183 	{
184 		for(int i = 0; i < _num_sift[index]; ++i)
185 		{
186 			sift_buffer[i*2] = *locations++;
187 			sift_buffer[i*2+1]= *locations ++;
188 			locations += gap;
189 		}
190 	}
191 	sift_buffer.resize(w * h * 2, 0);
192 	_texLoc[index].SetImageSize(w , h);
193 	_texLoc[index].BindTex();
194 	if(_sift_num_stripe == 1)
195 	{
196 		glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, w, h, GL_LUMINANCE_ALPHA , GL_FLOAT , &sift_buffer[0]);
197 	}else
198 	{
199 		for(int i = 0; i < _sift_num_stripe; ++i)
200 		{
201 			int ws = _sift_per_stripe;
202 			int x = i * ws;
203 			int pos = i * ws * h * 2;
204 			glTexSubImage2D(GlobalUtil::_texTarget, 0, x, 0, ws, h, GL_LUMINANCE_ALPHA , GL_FLOAT, &sift_buffer[pos]);
205 		}
206 	}
207 	_texLoc[index].UnbindTex();
208 	_have_loc[index] = 1;
209 }
210 
SetDescriptors(int index,int num,const float * descriptors,int id)211 void SiftMatchGL::SetDescriptors(int index, int num, const float* descriptors, int id)
212 {
213 	if(_initialized == 0) return;
214 	if (index > 1) index = 1;
215 	if (index < 0) index = 0;
216 	_have_loc[index] = 0;
217 
218 	//the same feature is already set
219 	if(id !=-1 && id == _id_sift[index]) return ;
220 	_id_sift[index] = id;
221 
222 	if(num > __max_sift) num = __max_sift;
223 
224 	sift_buffer.resize(num * 128 /4);
225 	unsigned char * pub = (unsigned char*) &sift_buffer[0];
226 	for(int i = 0; i < 128 * num; ++i)
227 	{
228 		pub[i] = int(512 * descriptors[i] + 0.5);
229 	}
230 	_num_sift[index] = num;
231 	int w = _sift_per_row * _pixel_per_sift;
232 	int h = (num + _sift_per_row  - 1)/ _sift_per_row;
233 	sift_buffer.resize(w * h * 4, 0);
234 	_texDes[index].SetImageSize(w, h);
235 	_texDes[index].BindTex();
236 	if(_sift_num_stripe == 1)
237 	{
238 		glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, w, h, GL_RGBA,  GL_UNSIGNED_BYTE, &sift_buffer[0]);
239 	}else
240 	{
241 		for(int i = 0; i < _sift_num_stripe; ++i)
242 		{
243 			int ws = _sift_per_stripe * _pixel_per_sift;
244 			int x = i * ws;
245 			int pos = i * ws * h * 4;
246 			glTexSubImage2D(GlobalUtil::_texTarget, 0, x, 0, ws, h, GL_RGBA, GL_UNSIGNED_BYTE, &sift_buffer[pos]);
247 		}
248 	}
249 	_texDes[index].UnbindTex();
250 }
251 
252 
LoadSiftMatchShadersGLSL()253 void SiftMatchGL::LoadSiftMatchShadersGLSL()
254 {
255 	ProgramGLSL * program;
256 	ostringstream out;
257 	if(GlobalUtil::_IsNvidia)
258 	out <<  "#pragma optionNV(ifcvt none)\n"
259 			"#pragma optionNV(unroll all)\n";
260 
261     out <<  "#define SIFT_PER_STRIPE " << _sift_per_stripe << ".0\n"
262 			"#define PIXEL_PER_SIFT " << _pixel_per_sift << "\n"
263 			"uniform sampler2DRect tex1, tex2; uniform vec2	size;\n"
264 			"void main()		\n"
265 		    "{\n"
266 		<<	"   vec4 val = vec4(0.0, 0.0, 0.0, 0.0), data1, buf;\n"
267 			"   vec2 index = gl_FragCoord.yx; \n"
268 			"   vec2 stripe_size = size.xy * SIFT_PER_STRIPE;\n"
269 			"	vec2 temp_div1 = index / stripe_size;\n"
270 			"   vec2 stripe_index = floor(temp_div1);\n"
271 			"   index = floor(stripe_size * (temp_div1 - stripe_index));\n"
272 			"	vec2 temp_div2 = index * vec2(1.0 / float(SIFT_PER_STRIPE));\n"
273 			"	vec2 temp_floor2 = floor(temp_div2);\n"
274 			"   vec2 index_v = temp_floor2 + vec2(0.5);\n "
275 			"   vec2 index_h = vec2(SIFT_PER_STRIPE)* (temp_div2 - temp_floor2);\n"
276 			"   vec2 tx = (index_h + stripe_index * vec2(SIFT_PER_STRIPE))* vec2(PIXEL_PER_SIFT) + 0.5;\n"
277 			"   vec2 tpos1, tpos2; \n"
278 			"	vec4 tpos = vec4(tx, index_v);\n"
279 			//////////////////////////////////////////////////////
280 			"   for(int i = 0; i < PIXEL_PER_SIFT; ++i){\n"
281 			"		buf = texture2DRect(tex2, tpos.yw);\n"
282 			"		data1 = texture2DRect(tex1, tpos.xz);\n"
283 			"		val += (data1 * buf);\n"
284 			"		tpos.xy = tpos.xy + vec2(1.0, 1.0);\n"
285 			"	}\n"
286 			"	const float factor = 0.248050689697265625; \n"
287 			"	gl_FragColor =vec4(dot(val, vec4(factor)), index,  0);\n"
288 			"}"
289 		<<	'\0';
290 
291 	s_multiply = program= new ProgramGLSL(out.str().c_str());
292 
293 	_param_multiply_tex1 = glGetUniformLocation(*program, "tex1");
294 	_param_multiply_tex2 = glGetUniformLocation(*program, "tex2");
295 	_param_multiply_size = glGetUniformLocation(*program, "size");
296 
297 	out.seekp(ios::beg);
298     if(GlobalUtil::_IsNvidia)
299     out <<  "#pragma optionNV(ifcvt none)\n"
300 			"#pragma optionNV(unroll all)\n";
301 
302     out <<  "#define SIFT_PER_STRIPE " << _sift_per_stripe << ".0\n"
303 			"#define PIXEL_PER_SIFT " << _pixel_per_sift << "\n"
304 			"uniform sampler2DRect tex1, tex2;\n"
305 			"uniform sampler2DRect texL1;\n"
306 			"uniform sampler2DRect texL2; \n"
307 			"uniform mat3 H; \n"
308 			"uniform mat3 F; \n"
309 			"uniform vec4	size; \n"
310 			"void main()		\n"
311 		    "{\n"
312 		<<	"   vec4 val = vec4(0.0, 0.0, 0.0, 0.0), data1, buf;\n"
313 			"   vec2 index = gl_FragCoord.yx; \n"
314 			"   vec2 stripe_size = size.xy * SIFT_PER_STRIPE;\n"
315 			"	vec2 temp_div1 = index / stripe_size;\n"
316 			"   vec2 stripe_index = floor(temp_div1);\n"
317 			"   index = floor(stripe_size * (temp_div1 - stripe_index));\n"
318 			"	vec2 temp_div2 = index  * vec2(1.0/ float(SIFT_PER_STRIPE));\n"
319 			"	vec2 temp_floor2 = floor(temp_div2);\n"
320 			"   vec2 index_v = temp_floor2 + vec2(0.5);\n "
321 			"   vec2 index_h = vec2(SIFT_PER_STRIPE)* (temp_div2 - temp_floor2);\n"
322 
323 			//read feature location data
324 			"   vec4 tlpos = vec4((index_h + stripe_index * vec2(SIFT_PER_STRIPE)) + 0.5, index_v);\n"
325 			"   vec3 loc1 = vec3(texture2DRect(texL1, tlpos.xz).xw, 1.0);\n"
326 			"   vec3 loc2 = vec3(texture2DRect(texL2, tlpos.yw).xw, 1.0);\n"
327 
328 			//check the guiding homography
329 			"   vec3 hxloc1 = H* loc1;\n"
330 			"   vec2 diff = loc2.xy- (hxloc1.xy/hxloc1.z);\n"
331 			"   float disth = diff.x * diff.x + diff.y * diff.y;\n"
332 			"   if(disth > size.z ) {gl_FragColor = vec4(0.0, index, 0.0); return;}\n"
333 
334 			//check the guiding fundamental
335 			"   vec3 fx1 = (F * loc1), ftx2 = (loc2 * F);\n"
336 			"   float x2tfx1 = dot(loc2, fx1);\n"
337 			"   vec4 temp = vec4(fx1.xy, ftx2.xy); \n"
338 			"   float sampson_error = (x2tfx1 * x2tfx1) / dot(temp, temp);\n"
339 			"   if(sampson_error > size.w) {gl_FragColor = vec4(0.0, index, 0.0); return;}\n"
340 
341 			//compare feature descriptor
342 			"   vec2 tx = (index_h + stripe_index * SIFT_PER_STRIPE)* vec2(PIXEL_PER_SIFT) + 0.5;\n"
343 			"   vec2 tpos1, tpos2; \n"
344 			"	vec4 tpos = vec4(tx, index_v);\n"
345 			"   for(int i = 0; i < PIXEL_PER_SIFT; ++i){\n"
346 			"		buf = texture2DRect(tex2, tpos.yw);\n"
347 			"		data1 = texture2DRect(tex1, tpos.xz);\n"
348 			"		val += data1 * buf;\n"
349 			"		tpos.xy = tpos.xy + vec2(1.0, 1.0);\n"
350 			"	}\n"
351 			"	const float factor = 0.248050689697265625; \n"
352 			"	gl_FragColor =vec4(dot(val, vec4(factor)), index,  0.0);\n"
353 			"}"
354 		<<	'\0';
355 
356 	s_guided_mult = program= new ProgramGLSL(out.str().c_str());
357 
358 	_param_guided_mult_tex1 = glGetUniformLocation(*program, "tex1");
359 	_param_guided_mult_tex2= glGetUniformLocation(*program, "tex2");
360 	_param_guided_mult_texl1 = glGetUniformLocation(*program, "texL1");
361 	_param_guided_mult_texl2 = glGetUniformLocation(*program, "texL2");
362 	_param_guided_mult_h = glGetUniformLocation(*program, "H");
363 	_param_guided_mult_f = glGetUniformLocation(*program, "F");
364 	_param_guided_mult_param = glGetUniformLocation(*program, "size");
365 
366 	//row max
367 	out.seekp(ios::beg);
368 	out <<	"#define BLOCK_WIDTH 16.0\n"
369 			"uniform sampler2DRect tex;	uniform vec3 param;\n"
370 			"void main ()\n"
371 			"{\n"
372 			"	float index = gl_FragCoord.x + floor(gl_FragCoord.y) * BLOCK_WIDTH; \n"
373 			"	vec2 bestv = vec2(-1.0); float imax = -1.0;\n"
374 			"	for(float i = 0.0; i < param.x; i ++){\n "
375 			"		float v = texture2DRect(tex, vec2(i + 0.5, index)).r; \n"
376 			"		imax = v > bestv.r ? i : imax; \n "
377 			"		bestv  = v > bestv.r? vec2(v, bestv.r) : max(bestv, vec2(v));\n "
378 			"	}\n"
379 			"	bestv = acos(min(bestv, 1.0));\n"
380 			"	if(bestv.x >= param.y || bestv.x >= param.z * bestv.y) imax = -1.0;\n"
381 			"	gl_FragColor = vec4(imax, bestv, index);\n"
382 			"}"
383 		<<  '\0';
384 	s_row_max = program= new ProgramGLSL(out.str().c_str());
385 	_param_rowmax_param = glGetUniformLocation(*program, "param");
386 
387 	out.seekp(ios::beg);
388 	out <<	"#define BLOCK_WIDTH 16.0\n"
389 			"uniform sampler2DRect tex; uniform vec3 param;\n"
390 			"void main ()\n"
391 			"{\n"
392 			"	float index = gl_FragCoord.x + floor(gl_FragCoord.y) * BLOCK_WIDTH; \n"
393 			"	vec2 bestv = vec2(-1.0); float imax = -1.0;\n"
394 			"	for(float i = 0.0; i < param.x; i ++){\n "
395 			"		float v = texture2DRect(tex, vec2(index, i + 0.5)).r; \n"
396 			"		imax = (v > bestv.r)? i : imax; \n "
397 			"		bestv  = v > bestv.r? vec2(v, bestv.r) : max(bestv, vec2(v));\n "
398 			"	}\n"
399 			"	bestv = acos(min(bestv, 1.0));\n"
400 			"	if(bestv.x >= param.y || bestv.x >= param.z * bestv.y) imax = -1.0;\n"
401 			"	gl_FragColor = vec4(imax, bestv, index);\n"
402 			"}"
403 		<<  '\0';
404 	s_col_max = program =new ProgramGLSL(out.str().c_str());
405 	_param_colmax_param = glGetUniformLocation(*program, "param");
406 
407 
408 }
409 
GetGuidedSiftMatch(int max_match,uint32_t match_buffer[][2],float * H,float * F,float distmax,float ratiomax,float hdistmax,float fdistmax,int mbm)410 int  SiftMatchGL::GetGuidedSiftMatch(int max_match, uint32_t match_buffer[][2], float* H, float* F,
411 									 float distmax, float ratiomax, float hdistmax, float fdistmax, int mbm)
412 {
413 
414 	int dw = _num_sift[1];
415 	int dh = _num_sift[0];
416 	if(_initialized ==0) return 0;
417 	if(dw <= 0 || dh <=0) return 0;
418 	if(_have_loc[0] == 0 || _have_loc[1] == 0) return 0;
419 
420 	FrameBufferObject fbo;
421 	glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
422 	_texDot.SetImageSize(dw, dh);
423 
424 
425 	//data
426 	_texDot.AttachToFBO(0);
427 	_texDot.FitTexViewPort();
428 	glActiveTexture(GL_TEXTURE0);
429 	_texDes[0].BindTex();
430 	glActiveTexture(GL_TEXTURE1);
431 	_texDes[1].BindTex();
432 	glActiveTexture(GL_TEXTURE2);
433 	_texLoc[0].BindTex();
434 	glActiveTexture(GL_TEXTURE3);
435 	_texLoc[1].BindTex();
436 
437 	//multiply the descriptor matrices
438 	s_guided_mult->UseProgram();
439 
440 
441 	//set parameters glsl
442 	float dot_param[4] = {(float)_texDes[0].GetDrawHeight(), (float) _texDes[1].GetDrawHeight(), hdistmax, fdistmax};
443 	glUniform1i(_param_guided_mult_tex1, 0);
444 	glUniform1i(_param_guided_mult_tex2, 1);
445 	glUniform1i(_param_guided_mult_texl1, 2);
446 	glUniform1i(_param_guided_mult_texl2, 3);
447 	glUniformMatrix3fv(_param_guided_mult_h, 1, GL_TRUE, H);
448 	glUniformMatrix3fv(_param_guided_mult_f, 1, GL_TRUE, F);
449 	glUniform4fv(_param_guided_mult_param, 1, dot_param);
450 
451 	_texDot.DrawQuad();
452 
453 	GLTexImage::UnbindMultiTex(4);
454 
455 	return GetBestMatch(max_match, match_buffer, distmax, ratiomax, mbm);
456 }
457 
GetBestMatch(int max_match,uint32_t match_buffer[][2],float distmax,float ratiomax,int mbm)458 int SiftMatchGL::GetBestMatch(int max_match, uint32_t match_buffer[][2], float distmax, float ratiomax, int mbm)
459 {
460 
461 	glActiveTexture(GL_TEXTURE0);
462 	_texDot.BindTex();
463 
464 	//readback buffer
465 	sift_buffer.resize(_num_sift[0] + _num_sift[1] + 16);
466 	float * buffer1 = &sift_buffer[0], * buffer2 = &sift_buffer[_num_sift[0]];
467 
468 	//row max
469 	_texMatch[0].AttachToFBO(0);
470 	_texMatch[0].SetImageSize(16, ( _num_sift[0] + 15) / 16);
471 	_texMatch[0].FitTexViewPort();
472 
473 	///set parameter glsl
474 	s_row_max->UseProgram();
475 	glUniform3f(_param_rowmax_param, (float)_num_sift[1], distmax, ratiomax);
476 
477 	_texMatch[0].DrawQuad();
478 	glReadPixels(0, 0, 16, (_num_sift[0] + 15)/16, GL_RED, GL_FLOAT, buffer1);
479 
480 	//col max
481 	if(mbm)
482 	{
483 		_texMatch[1].AttachToFBO(0);
484 		_texMatch[1].SetImageSize(16, (_num_sift[1] + 15) / 16);
485 		_texMatch[1].FitTexViewPort();
486 		//set parameter glsl
487 		s_col_max->UseProgram();
488 		glUniform3f(_param_rowmax_param, (float)_num_sift[0], distmax, ratiomax);
489 		_texMatch[1].DrawQuad();
490 		glReadPixels(0, 0, 16, (_num_sift[1] + 15) / 16, GL_RED, GL_FLOAT, buffer2);
491 	}
492 
493 
494 	//unload
495 	glUseProgram(0);
496 
497 	GLTexImage::UnbindMultiTex(2);
498 	GlobalUtil::CleanupOpenGL();
499 
500 	//write back the matches
501 	int nmatch = 0, j ;
502 	for(int i = 0; i < _num_sift[0] && nmatch < max_match; ++i)
503 	{
504 		j = int(buffer1[i]);
505 		if( j>= 0 && (!mbm ||int(buffer2[j]) == i))
506 		{
507 			match_buffer[nmatch][0] = i;
508 			match_buffer[nmatch][1] = j;
509 			nmatch++;
510 		}
511 	}
512 
513   const GLenum error_code(glGetError());
514   if (error_code != GL_NO_ERROR) {
515     return -1;
516   }
517 
518 	return nmatch;
519 }
520 
GetSiftMatch(int max_match,uint32_t match_buffer[][2],float distmax,float ratiomax,int mbm)521 int  SiftMatchGL::GetSiftMatch(int max_match, uint32_t match_buffer[][2], float distmax, float ratiomax, int mbm)
522 {
523 	int dw = _num_sift[1];
524 	int dh =  _num_sift[0];
525 	if(_initialized ==0) return 0;
526 	if(dw <= 0 || dh <=0) return 0;
527 
528 	FrameBufferObject fbo;
529 	glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
530 	_texDot.SetImageSize(dw, dh);
531 
532 	//data
533 	_texDot.AttachToFBO(0);
534 	_texDot.FitTexViewPort();
535 	glActiveTexture(GL_TEXTURE0);
536 	_texDes[0].BindTex();
537 	glActiveTexture(GL_TEXTURE1);
538 	_texDes[1].BindTex();
539 
540 	//////////////////
541 	//multiply the descriptor matrices
542 	s_multiply->UseProgram();
543 	//set parameters
544 	float heights[2] = {(float)_texDes[0].GetDrawHeight(), (float)_texDes[1].GetDrawHeight()};
545 
546 	glUniform1i(_param_multiply_tex1, 0);
547 	glUniform1i(_param_multiply_tex2 , 1);
548 	glUniform2fv(_param_multiply_size, 1, heights);
549 
550 	_texDot.DrawQuad();
551 
552 	glActiveTexture(GL_TEXTURE1);
553 	glBindTexture(GlobalUtil::_texTarget, 0);
554 
555 	return GetBestMatch(max_match, match_buffer, distmax, ratiomax, mbm);
556 }
557 
558 
_CreateContextGL()559 int SiftMatchGPU::_CreateContextGL()
560 {
561 	//Create an OpenGL Context?
562     if (__language >= SIFTMATCH_CUDA) {}
563 	else if(!GlobalUtil::CreateWindowEZ())
564 	{
565 #if CUDA_SIFTGPU_ENABLED
566 		__language = SIFTMATCH_CUDA;
567 #else
568 		return 0;
569 #endif
570 	}
571 	return VerifyContextGL();
572 }
573 
574 
_VerifyContextGL()575 int SiftMatchGPU::_VerifyContextGL()
576 {
577 	if(__matcher) return GlobalUtil::_GoodOpenGL;
578 
579 #ifdef CUDA_SIFTGPU_ENABLED
580 
581     if(__language >= SIFTMATCH_CUDA) {}
582     else if(__language == SIFTMATCH_SAME_AS_SIFTGPU && GlobalUtil::_UseCUDA){}
583     else  GlobalUtil::InitGLParam(0);
584     if(GlobalUtil::_GoodOpenGL == 0) __language = SIFTMATCH_CUDA;
585 
586     if(((__language == SIFTMATCH_SAME_AS_SIFTGPU && GlobalUtil::_UseCUDA) || __language >= SIFTMATCH_CUDA)
587         && SiftMatchCU::CheckCudaDevice (GlobalUtil::_DeviceIndex))
588     {
589 		__language = SIFTMATCH_CUDA;
590 		__matcher = ::new SiftMatchCU(__max_sift);
591 	}else
592 #else
593     if((__language == SIFTMATCH_SAME_AS_SIFTGPU && GlobalUtil::_UseCUDA) || __language >= SIFTMATCH_CUDA)
594     {
595 	    std::cerr	<< "---------------------------------------------------------------------------\n"
596 				    << "CUDA not supported in this binary! To enable it, please use SiftGPU_CUDA_Enable\n"
597 				    << "Project for VS2005+ or set siftgpu_enable_cuda to 1 in makefile\n"
598 				    << "----------------------------------------------------------------------------\n";
599     }
600 #endif
601 	{
602 		__language = SIFTMATCH_GLSL;
603 		__matcher = ::new SiftMatchGL(__max_sift, 1);
604 	}
605 
606 	if(GlobalUtil::_verbose)
607         std::cout   << "[SiftMatchGPU]: " << (__language == SIFTMATCH_CUDA? "CUDA" : "GLSL") <<"\n\n";
608 
609 	__matcher->InitSiftMatch();
610 	return GlobalUtil::_GoodOpenGL;
611 }
612 
operator new(size_t size)613 void* SiftMatchGPU::operator new (size_t  size){
614   void * p = malloc(size);
615   if (p == 0)
616   {
617 	  const std::bad_alloc ba;
618 	  throw ba;
619   }
620   return p;
621 }
622 
623 
SiftMatchGPU(int max_sift)624 SiftMatchGPU::SiftMatchGPU(int max_sift)
625 {
626 	__max_sift = max(max_sift, 1024);
627 	__language = 0;
628 	__matcher = NULL;
629 }
630 
SetLanguage(int language)631 void SiftMatchGPU::SetLanguage(int language)
632 {
633 	if(__matcher) return;
634     ////////////////////////
635 #ifdef CUDA_SIFTGPU_ENABLED
636 	if(language >= SIFTMATCH_CUDA) GlobalUtil::_DeviceIndex = language - SIFTMATCH_CUDA;
637 #endif
638     __language = language > SIFTMATCH_CUDA ? SIFTMATCH_CUDA : language;
639 }
640 
SetDeviceParam(int argc,char ** argv)641 void SiftMatchGPU::SetDeviceParam(int argc, char**argv)
642 {
643     if(__matcher) return;
644     GlobalUtil::SetDeviceParam(argc, argv);
645 }
646 
Allocate(int max_sift,int mbm)647 bool SiftMatchGPU::Allocate(int max_sift, int mbm) {
648   if(__matcher) {
649     const bool success = __matcher->Allocate(max_sift, mbm);
650     __max_sift = __matcher->__max_sift;
651     return success;
652   }
653 
654   return false;
655 }
656 
SetMaxSift(int max_sift)657 void SiftMatchGPU::SetMaxSift(int max_sift)
658 {
659 	if(__matcher)	{
660     __matcher->SetMaxSift(max(128, max_sift));
661     __max_sift = __matcher->__max_sift;
662   } else {
663     __max_sift = max(128, max_sift);
664   }
665 }
666 
~SiftMatchGPU()667 SiftMatchGPU::~SiftMatchGPU()
668 {
669 	if(__matcher) delete __matcher;
670 }
671 
SetDescriptors(int index,int num,const unsigned char * descriptors,int id)672 void SiftMatchGPU::SetDescriptors(int index, int num, const unsigned char* descriptors, int id)
673 {
674 	__matcher->SetDescriptors(index, num,  descriptors, id);
675 }
676 
SetDescriptors(int index,int num,const float * descriptors,int id)677 void SiftMatchGPU::SetDescriptors(int index, int num, const float* descriptors, int id)
678 {
679 	__matcher->SetDescriptors(index, num, descriptors, id);
680 }
681 
SetFeautreLocation(int index,const float * locations,int gap)682 void SiftMatchGPU::SetFeautreLocation(int index, const float* locations, int gap)
683 {
684 	__matcher->SetFeautreLocation(index, locations, gap);
685 
686 }
GetGuidedSiftMatch(int max_match,uint32_t match_buffer[][2],float * H,float * F,float distmax,float ratiomax,float hdistmax,float fdistmax,int mutual_best_match)687 int  SiftMatchGPU::GetGuidedSiftMatch(int max_match, uint32_t match_buffer[][2], float* H, float* F,
688 				float distmax, float ratiomax, float hdistmax, float fdistmax, int mutual_best_match)
689 {
690 	if(H == NULL && F == NULL)
691 	{
692 		return __matcher->GetSiftMatch(max_match, match_buffer, distmax, ratiomax, mutual_best_match);
693 	}else
694 	{
695 		float Z[9] = {1, 0, 0, 0, 1, 0, 0, 0, 1}, ti = (1.0e+20F);
696 
697 		return __matcher->GetGuidedSiftMatch(max_match, match_buffer, H? H : Z, F? F : Z,
698 			distmax, ratiomax, H? hdistmax: ti,  F? fdistmax: ti, mutual_best_match);
699 	}
700 }
701 
GetSiftMatch(int max_match,uint32_t match_buffer[][2],float distmax,float ratiomax,int mutual_best_match)702 int  SiftMatchGPU::GetSiftMatch(int max_match, uint32_t match_buffer[][2], float distmax, float ratiomax, int mutual_best_match)
703 {
704 	return __matcher->GetSiftMatch(max_match, match_buffer, distmax, ratiomax, mutual_best_match);
705 }
706 
CreateNewSiftMatchGPU(int max_sift)707 SiftMatchGPU* CreateNewSiftMatchGPU(int max_sift)
708 {
709 	return new SiftMatchGPU(max_sift);
710 }
711 
712