1 // Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 //       notice, this list of conditions and the following disclaimer.
9 //
10 //     * Redistributions in binary form must reproduce the above copyright
11 //       notice, this list of conditions and the following disclaimer in the
12 //       documentation and/or other materials provided with the distribution.
13 //
14 //     * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
15 //       its contributors may be used to endorse or promote products derived
16 //       from this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
22 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 // POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
31 
32 #ifndef COLMAP_SRC_FEATURE_SIFT_H_
33 #define COLMAP_SRC_FEATURE_SIFT_H_
34 
35 #include "estimators/two_view_geometry.h"
36 #include "feature/types.h"
37 #include "util/bitmap.h"
38 
39 class SiftGPU;
40 class SiftMatchGPU;
41 
42 namespace colmap {
43 
44 struct SiftExtractionOptions {
45   // Number of threads for feature extraction.
46   int num_threads = -1;
47 
48   // Whether to use the GPU for feature extraction.
49   bool use_gpu = true;
50 
51   // Index of the GPU used for feature extraction. For multi-GPU extraction,
52   // you should separate multiple GPU indices by comma, e.g., "0,1,2,3".
53   std::string gpu_index = "-1";
54 
55   // Maximum image size, otherwise image will be down-scaled.
56   int max_image_size = 3200;
57 
58   // Maximum number of features to detect, keeping larger-scale features.
59   int max_num_features = 8192;
60 
61   // First octave in the pyramid, i.e. -1 upsamples the image by one level.
62   int first_octave = -1;
63 
64   // Number of octaves.
65   int num_octaves = 4;
66 
67   // Number of levels per octave.
68   int octave_resolution = 3;
69 
70   // Peak threshold for detection.
71   double peak_threshold = 0.02 / octave_resolution;
72 
73   // Edge threshold for detection.
74   double edge_threshold = 10.0;
75 
76   // Estimate affine shape of SIFT features in the form of oriented ellipses as
77   // opposed to original SIFT which estimates oriented disks.
78   bool estimate_affine_shape = false;
79 
80   // Maximum number of orientations per keypoint if not estimate_affine_shape.
81   int max_num_orientations = 2;
82 
83   // Fix the orientation to 0 for upright features.
84   bool upright = false;
85 
86   // Whether to adapt the feature detection depending on the image darkness.
87   // Note that this feature is only available in the OpenGL SiftGPU version.
88   bool darkness_adaptivity = false;
89 
90   // Domain-size pooling parameters. Domain-size pooling computes an average
91   // SIFT descriptor across multiple scales around the detected scale. This was
92   // proposed in "Domain-Size Pooling in Local Descriptors and Network
93   // Architectures", J. Dong and S. Soatto, CVPR 2015. This has been shown to
94   // outperform other SIFT variants and learned descriptors in "Comparative
95   // Evaluation of Hand-Crafted and Learned Local Features", Schönberger,
96   // Hardmeier, Sattler, Pollefeys, CVPR 2016.
97   bool domain_size_pooling = false;
98   double dsp_min_scale = 1.0 / 6.0;
99   double dsp_max_scale = 3.0;
100   int dsp_num_scales = 10;
101 
102   enum class Normalization {
103     // L1-normalizes each descriptor followed by element-wise square rooting.
104     // This normalization is usually better than standard L2-normalization.
105     // See "Three things everyone should know to improve object retrieval",
106     // Relja Arandjelovic and Andrew Zisserman, CVPR 2012.
107     L1_ROOT,
108     // Each vector is L2-normalized.
109     L2,
110   };
111   Normalization normalization = Normalization::L1_ROOT;
112 
113   bool Check() const;
114 };
115 
116 struct SiftMatchingOptions {
117   // Number of threads for feature matching and geometric verification.
118   int num_threads = -1;
119 
120   // Whether to use the GPU for feature matching.
121   bool use_gpu = true;
122 
123   // Index of the GPU used for feature matching. For multi-GPU matching,
124   // you should separate multiple GPU indices by comma, e.g., "0,1,2,3".
125   std::string gpu_index = "-1";
126 
127   // Maximum distance ratio between first and second best match.
128   double max_ratio = 0.8;
129 
130   // Maximum distance to best match.
131   double max_distance = 0.7;
132 
133   // Whether to enable cross checking in matching.
134   bool cross_check = true;
135 
136   // Maximum number of matches.
137   int max_num_matches = 32768;
138 
139   // Maximum epipolar error in pixels for geometric verification.
140   double max_error = 4.0;
141 
142   // Confidence threshold for geometric verification.
143   double confidence = 0.999;
144 
145   // Minimum/maximum number of RANSAC iterations. Note that this option
146   // overrules the min_inlier_ratio option.
147   int min_num_trials = 100;
148   int max_num_trials = 10000;
149 
150   // A priori assumed minimum inlier ratio, which determines the maximum
151   // number of iterations.
152   double min_inlier_ratio = 0.25;
153 
154   // Minimum number of inliers for an image pair to be considered as
155   // geometrically verified.
156   int min_num_inliers = 15;
157 
158   // Whether to attempt to estimate multiple geometric models per image pair.
159   bool multiple_models = false;
160 
161   // Whether to perform guided matching, if geometric verification succeeds.
162   bool guided_matching = false;
163 
164   bool Check() const;
165 };
166 
167 // Extract SIFT features for the given image on the CPU. Only extract
168 // descriptors if the given input is not NULL.
169 bool ExtractSiftFeaturesCPU(const SiftExtractionOptions& options,
170                             const Bitmap& bitmap, FeatureKeypoints* keypoints,
171                             FeatureDescriptors* descriptors);
172 bool ExtractCovariantSiftFeaturesCPU(const SiftExtractionOptions& options,
173                                      const Bitmap& bitmap,
174                                      FeatureKeypoints* keypoints,
175                                      FeatureDescriptors* descriptors);
176 
177 // Create a SiftGPU feature extractor. The same SiftGPU instance can be used to
178 // extract features for multiple images. Note a OpenGL context must be made
179 // current in the thread of the caller. If the gpu_index is not -1, the CUDA
180 // version of SiftGPU is used, which produces slightly different results
181 // than the OpenGL implementation.
182 bool CreateSiftGPUExtractor(const SiftExtractionOptions& options,
183                             SiftGPU* sift_gpu);
184 
185 // Extract SIFT features for the given image on the GPU.
186 // SiftGPU must already be initialized using `CreateSiftGPU`.
187 bool ExtractSiftFeaturesGPU(const SiftExtractionOptions& options,
188                             const Bitmap& bitmap, SiftGPU* sift_gpu,
189                             FeatureKeypoints* keypoints,
190                             FeatureDescriptors* descriptors);
191 
192 // Load keypoints and descriptors from text file in the following format:
193 //
194 //    LINE_0:            NUM_FEATURES DIM
195 //    LINE_1:            X Y SCALE ORIENTATION D_1 D_2 D_3 ... D_DIM
196 //    LINE_I:            ...
197 //    LINE_NUM_FEATURES: X Y SCALE ORIENTATION D_1 D_2 D_3 ... D_DIM
198 //
199 // where the first line specifies the number of features and the descriptor
200 // dimensionality followed by one line per feature: X, Y, SCALE, ORIENTATION are
201 // of type float and D_J represent the descriptor in the range [0, 255].
202 //
203 // For example:
204 //
205 //    2 4
206 //    0.32 0.12 1.23 1.0 1 2 3 4
207 //    0.32 0.12 1.23 1.0 1 2 3 4
208 //
209 void LoadSiftFeaturesFromTextFile(const std::string& path,
210                                   FeatureKeypoints* keypoints,
211                                   FeatureDescriptors* descriptors);
212 
213 // Match the given SIFT features on the CPU.
214 void MatchSiftFeaturesCPUBruteForce(const SiftMatchingOptions& match_options,
215                                     const FeatureDescriptors& descriptors1,
216                                     const FeatureDescriptors& descriptors2,
217                                     FeatureMatches* matches);
218 void MatchSiftFeaturesCPUFLANN(const SiftMatchingOptions& match_options,
219                                const FeatureDescriptors& descriptors1,
220                                const FeatureDescriptors& descriptors2,
221                                FeatureMatches* matches);
222 void MatchSiftFeaturesCPU(const SiftMatchingOptions& match_options,
223                           const FeatureDescriptors& descriptors1,
224                           const FeatureDescriptors& descriptors2,
225                           FeatureMatches* matches);
226 void MatchGuidedSiftFeaturesCPU(const SiftMatchingOptions& match_options,
227                                 const FeatureKeypoints& keypoints1,
228                                 const FeatureKeypoints& keypoints2,
229                                 const FeatureDescriptors& descriptors1,
230                                 const FeatureDescriptors& descriptors2,
231                                 TwoViewGeometry* two_view_geometry);
232 
233 // Create a SiftGPU feature matcher. Note that if CUDA is not available or the
234 // gpu_index is -1, the OpenGLContextManager must be created in the main thread
235 // of the Qt application before calling this function. The same SiftMatchGPU
236 // instance can be used to match features between multiple image pairs.
237 bool CreateSiftGPUMatcher(const SiftMatchingOptions& match_options,
238                           SiftMatchGPU* sift_match_gpu);
239 
240 // Match the given SIFT features on the GPU. If either of the descriptors is
241 // NULL, the keypoints/descriptors will not be uploaded and the previously
242 // uploaded descriptors will be reused for the matching.
243 void MatchSiftFeaturesGPU(const SiftMatchingOptions& match_options,
244                           const FeatureDescriptors* descriptors1,
245                           const FeatureDescriptors* descriptors2,
246                           SiftMatchGPU* sift_match_gpu,
247                           FeatureMatches* matches);
248 void MatchGuidedSiftFeaturesGPU(const SiftMatchingOptions& match_options,
249                                 const FeatureKeypoints* keypoints1,
250                                 const FeatureKeypoints* keypoints2,
251                                 const FeatureDescriptors* descriptors1,
252                                 const FeatureDescriptors* descriptors2,
253                                 SiftMatchGPU* sift_match_gpu,
254                                 TwoViewGeometry* two_view_geometry);
255 
256 }  // namespace colmap
257 
258 #endif  // COLMAP_SRC_FEATURE_SIFT_H_
259 
260