1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 #ifndef FLANN_AUTOTUNED_INDEX_H_
31 #define FLANN_AUTOTUNED_INDEX_H_
32 
33 #include "flann/general.h"
34 #include "flann/algorithms/nn_index.h"
35 #include "flann/nn/ground_truth.h"
36 #include "flann/nn/index_testing.h"
37 #include "flann/util/sampling.h"
38 #include "flann/algorithms/kdtree_index.h"
39 #include "flann/algorithms/kdtree_single_index.h"
40 #include "flann/algorithms/kmeans_index.h"
41 #include "flann/algorithms/composite_index.h"
42 #include "flann/algorithms/linear_index.h"
43 #include "flann/util/logger.h"
44 
45 
46 namespace flann
47 {
48 
49 template<typename Distance>
50 inline NNIndex<Distance>*
51   create_index_by_type(const flann_algorithm_t index_type,
52         const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance = Distance());
53 
54 
55 struct AutotunedIndexParams : public IndexParams
56 {
57     AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1)
58     {
59         (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED;
60         // precision desired (used for autotuning, -1 otherwise)
61         (*this)["target_precision"] = target_precision;
62         // build tree time weighting factor
63         (*this)["build_weight"] = build_weight;
64         // index memory weighting factor
65         (*this)["memory_weight"] = memory_weight;
66         // what fraction of the dataset to use for autotuning
67         (*this)["sample_fraction"] = sample_fraction;
68     }
69 };
70 
71 
72 template <typename Distance>
73 class AutotunedIndex : public NNIndex<Distance>
74 {
75 public:
76     typedef typename Distance::ElementType ElementType;
77     typedef typename Distance::ResultType DistanceType;
78 
79     typedef NNIndex<Distance> BaseClass;
80 
81     typedef AutotunedIndex<Distance> IndexType;
82 
83     typedef bool needs_kdtree_distance;
84 
85     AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
BaseClass(params,d)86         BaseClass(params, d), bestIndex_(NULL), speedup_(0), dataset_(inputData)
87     {
88         target_precision_ = get_param(params, "target_precision",0.8f);
89         build_weight_ =  get_param(params,"build_weight", 0.01f);
90         memory_weight_ = get_param(params, "memory_weight", 0.0f);
91         sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
92     }
93 
94     AutotunedIndex(const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
BaseClass(params,d)95         BaseClass(params, d), bestIndex_(NULL), speedup_(0)
96     {
97         target_precision_ = get_param(params, "target_precision",0.8f);
98         build_weight_ =  get_param(params,"build_weight", 0.01f);
99         memory_weight_ = get_param(params, "memory_weight", 0.0f);
100         sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
101     }
102 
AutotunedIndex(const AutotunedIndex & other)103     AutotunedIndex(const AutotunedIndex& other) : BaseClass(other),
104     		bestParams_(other.bestParams_),
105     		bestSearchParams_(other.bestSearchParams_),
106     		speedup_(other.speedup_),
107     		dataset_(other.dataset_),
108     		target_precision_(other.target_precision_),
109     		build_weight_(other.build_weight_),
110     		memory_weight_(other.memory_weight_),
111     		sample_fraction_(other.sample_fraction_)
112     {
113     		bestIndex_ = other.bestIndex_->clone();
114     }
115 
116     AutotunedIndex& operator=(AutotunedIndex other)
117     {
118     	this->swap(other);
119     	return * this;
120     }
121 
~AutotunedIndex()122     virtual ~AutotunedIndex()
123     {
124     	delete bestIndex_;
125     }
126 
clone()127     BaseClass* clone() const
128     {
129     	return new AutotunedIndex(*this);
130     }
131 
132     /**
133      *          Method responsible with building the index.
134      */
buildIndex()135     void buildIndex()
136     {
137         bestParams_ = estimateBuildParams();
138         Logger::info("----------------------------------------------------\n");
139         Logger::info("Autotuned parameters:\n");
140         if (Logger::getLevel()>=FLANN_LOG_INFO)
141         	print_params(bestParams_);
142         Logger::info("----------------------------------------------------\n");
143 
144         flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm");
145         bestIndex_ = create_index_by_type(index_type, dataset_, bestParams_, distance_);
146         bestIndex_->buildIndex();
147         speedup_ = estimateSearchParams(bestSearchParams_);
148         Logger::info("----------------------------------------------------\n");
149         Logger::info("Search parameters:\n");
150         if (Logger::getLevel()>=FLANN_LOG_INFO)
151         	print_params(bestSearchParams_);
152         Logger::info("----------------------------------------------------\n");
153         bestParams_["search_params"] = bestSearchParams_;
154         bestParams_["speedup"] = speedup_;
155     }
156 
buildIndex(const Matrix<ElementType> & dataset)157     void buildIndex(const Matrix<ElementType>& dataset)
158     {
159     	dataset_ = dataset;
160     	this->buildIndex();
161     }
162 
163 
164     void addPoints(const Matrix<ElementType>& points, float rebuild_threshold = 2)
165     {
166         if (bestIndex_) {
167             bestIndex_->addPoints(points, rebuild_threshold);
168         }
169     }
170 
removePoint(size_t id)171     void removePoint(size_t id)
172     {
173         if (bestIndex_) {
174             bestIndex_->removePoint(id);
175         }
176     }
177 
178 
179     template<typename Archive>
serialize(Archive & ar)180     void serialize(Archive& ar)
181     {
182     	ar.setObject(this);
183 
184     	ar & *static_cast<NNIndex<Distance>*>(this);
185 
186     	ar & target_precision_;
187     	ar & build_weight_;
188     	ar & memory_weight_;
189     	ar & sample_fraction_;
190 
191     	flann_algorithm_t index_type;
192     	if (Archive::is_saving::value) {
193     		index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm");
194     	}
195     	ar & index_type;
196     	ar & bestSearchParams_.checks;
197 
198     	if (Archive::is_loading::value) {
199     		bestParams_["algorithm"] = index_type;
200 
201     		index_params_["algorithm"] = getType();
202             index_params_["target_precision_"] = target_precision_;
203             index_params_["build_weight_"] = build_weight_;
204             index_params_["memory_weight_"] = memory_weight_;
205             index_params_["sample_fraction_"] = sample_fraction_;
206     	}
207     }
208 
saveIndex(FILE * stream)209     void saveIndex(FILE* stream)
210     {
211         {
212             serialization::SaveArchive sa(stream);
213             sa & *this;
214         }
215 
216     	bestIndex_->saveIndex(stream);
217     }
218 
loadIndex(FILE * stream)219     void loadIndex(FILE* stream)
220     {
221         {
222             serialization::LoadArchive la(stream);
223             la & *this;
224         }
225 
226         IndexParams params;
227         flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm");
228         bestIndex_ = create_index_by_type<Distance>((flann_algorithm_t)index_type, dataset_, params, distance_);
229         bestIndex_->loadIndex(stream);
230     }
231 
knnSearch(const Matrix<ElementType> & queries,Matrix<size_t> & indices,Matrix<DistanceType> & dists,size_t knn,const SearchParams & params)232     int knnSearch(const Matrix<ElementType>& queries,
233             Matrix<size_t>& indices,
234             Matrix<DistanceType>& dists,
235             size_t knn,
236             const SearchParams& params) const
237     {
238         if (params.checks == FLANN_CHECKS_AUTOTUNED) {
239             return bestIndex_->knnSearch(queries, indices, dists, knn, bestSearchParams_);
240         }
241         else {
242             return bestIndex_->knnSearch(queries, indices, dists, knn, params);
243         }
244     }
245 
knnSearch(const Matrix<ElementType> & queries,std::vector<std::vector<size_t>> & indices,std::vector<std::vector<DistanceType>> & dists,size_t knn,const SearchParams & params)246     int knnSearch(const Matrix<ElementType>& queries,
247             std::vector< std::vector<size_t> >& indices,
248             std::vector<std::vector<DistanceType> >& dists,
249             size_t knn,
250             const SearchParams& params) const
251     {
252         if (params.checks == FLANN_CHECKS_AUTOTUNED) {
253             return bestIndex_->knnSearch(queries, indices, dists, knn, bestSearchParams_);
254         }
255         else {
256             return bestIndex_->knnSearch(queries, indices, dists, knn, params);
257         }
258 
259     }
260 
radiusSearch(const Matrix<ElementType> & queries,Matrix<size_t> & indices,Matrix<DistanceType> & dists,DistanceType radius,const SearchParams & params)261     int radiusSearch(const Matrix<ElementType>& queries,
262             Matrix<size_t>& indices,
263             Matrix<DistanceType>& dists,
264             DistanceType radius,
265             const SearchParams& params) const
266     {
267         if (params.checks == FLANN_CHECKS_AUTOTUNED) {
268             return bestIndex_->radiusSearch(queries, indices, dists, radius, bestSearchParams_);
269         }
270         else {
271             return bestIndex_->radiusSearch(queries, indices, dists, radius, params);
272         }
273     }
274 
radiusSearch(const Matrix<ElementType> & queries,std::vector<std::vector<size_t>> & indices,std::vector<std::vector<DistanceType>> & dists,DistanceType radius,const SearchParams & params)275     int radiusSearch(const Matrix<ElementType>& queries,
276             std::vector< std::vector<size_t> >& indices,
277             std::vector<std::vector<DistanceType> >& dists,
278             DistanceType radius,
279             const SearchParams& params) const
280     {
281         if (params.checks == FLANN_CHECKS_AUTOTUNED) {
282             return bestIndex_->radiusSearch(queries, indices, dists, radius, bestSearchParams_);
283         }
284         else {
285             return bestIndex_->radiusSearch(queries, indices, dists, radius, params);
286         }
287     }
288 
289 
290 
291     /**
292      *      Method that searches for nearest-neighbors
293      */
findNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,const SearchParams & searchParams)294     void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) const
295     {
296         // should not get here
297         assert(false);
298     }
299 
getParameters()300     IndexParams getParameters() const
301     {
302         return bestParams_;
303     }
304 
getSearchParameters()305     FLANN_DEPRECATED SearchParams getSearchParameters() const
306     {
307         return bestSearchParams_;
308     }
309 
getSpeedup()310     FLANN_DEPRECATED float getSpeedup() const
311     {
312         return speedup_;
313     }
314 
315 
316     /**
317      *      Number of features in this index.
318      */
size()319     size_t size() const
320     {
321         return bestIndex_->size();
322     }
323 
324     /**
325      *  The length of each vector in this index.
326      */
veclen()327     size_t veclen() const
328     {
329         return bestIndex_->veclen();
330     }
331 
332     /**
333      * The amount of memory (in bytes) this index uses.
334      */
usedMemory()335     int usedMemory() const
336     {
337         return bestIndex_->usedMemory();
338     }
339 
340     /**
341      * Algorithm name
342      */
getType()343     flann_algorithm_t getType() const
344     {
345         return FLANN_INDEX_AUTOTUNED;
346     }
347 
348 protected:
buildIndexImpl()349     void buildIndexImpl()
350     {
351         /* nothing to do here */
352     }
353 
freeIndex()354     void freeIndex()
355     {
356         /* nothing to do here */
357     }
358 
359 private:
360 
361     struct CostData
362     {
363         float searchTimeCost;
364         float buildTimeCost;
365         float memoryCost;
366         float totalCost;
367         IndexParams params;
368     };
369 
evaluate_kmeans(CostData & cost)370     void evaluate_kmeans(CostData& cost)
371     {
372         StartStopTimer t;
373         int checks;
374         const int nn = 1;
375 
376         Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n",
377                      get_param<int>(cost.params,"iterations"),
378                      get_param<int>(cost.params,"branching"));
379         KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_);
380         // measure index build time
381         t.start();
382         kmeans.buildIndex();
383         t.stop();
384         float buildTime = (float)t.value;
385 
386         // measure search time
387         float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
388 
389         float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
390         cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
391         cost.searchTimeCost = searchTime;
392         cost.buildTimeCost = buildTime;
393         Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
394     }
395 
396 
evaluate_kdtree(CostData & cost)397     void evaluate_kdtree(CostData& cost)
398     {
399         StartStopTimer t;
400         int checks;
401         const int nn = 1;
402 
403         Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees"));
404         KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
405 
406         t.start();
407         kdtree.buildIndex();
408         t.stop();
409         float buildTime = (float)t.value;
410 
411         //measure search time
412         float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
413 
414         float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
415         cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
416         cost.searchTimeCost = searchTime;
417         cost.buildTimeCost = buildTime;
418         Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
419     }
420 
421 
422     //    struct KMeansSimpleDownhillFunctor {
423     //
424     //        Autotune& autotuner;
425     //        KMeansSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {};
426     //
427     //        float operator()(int* params) {
428     //
429     //            float maxFloat = numeric_limits<float>::max();
430     //
431     //            if (params[0]<2) return maxFloat;
432     //            if (params[1]<0) return maxFloat;
433     //
434     //            CostData c;
435     //            c.params["algorithm"] = KMEANS;
436     //            c.params["centers-init"] = CENTERS_RANDOM;
437     //            c.params["branching"] = params[0];
438     //            c.params["max-iterations"] = params[1];
439     //
440     //            autotuner.evaluate_kmeans(c);
441     //
442     //            return c.timeCost;
443     //
444     //        }
445     //    };
446     //
447     //    struct KDTreeSimpleDownhillFunctor {
448     //
449     //        Autotune& autotuner;
450     //        KDTreeSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {};
451     //
452     //        float operator()(int* params) {
453     //            float maxFloat = numeric_limits<float>::max();
454     //
455     //            if (params[0]<1) return maxFloat;
456     //
457     //            CostData c;
458     //            c.params["algorithm"] = KDTREE;
459     //            c.params["trees"] = params[0];
460     //
461     //            autotuner.evaluate_kdtree(c);
462     //
463     //            return c.timeCost;
464     //
465     //        }
466     //    };
467 
468 
469 
optimizeKMeans(std::vector<CostData> & costs)470     void optimizeKMeans(std::vector<CostData>& costs)
471     {
472         Logger::info("KMEANS, Step 1: Exploring parameter space\n");
473 
474         // explore kmeans parameters space using combinations of the parameters below
475         int maxIterations[] = { 1, 5, 10, 15 };
476         int branchingFactors[] = { 16, 32, 64, 128, 256 };
477 
478         int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
479         costs.reserve(costs.size() + kmeansParamSpaceSize);
480 
481         // evaluate kmeans for all parameter combinations
482         for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
483             for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
484                 CostData cost;
485                 cost.params["algorithm"] = FLANN_INDEX_KMEANS;
486                 cost.params["centers_init"] = FLANN_CENTERS_RANDOM;
487                 cost.params["iterations"] = maxIterations[i];
488                 cost.params["branching"] = branchingFactors[j];
489 
490                 evaluate_kmeans(cost);
491                 costs.push_back(cost);
492             }
493         }
494 
495         //         Logger::info("KMEANS, Step 2: simplex-downhill optimization\n");
496         //
497         //         const int n = 2;
498         //         // choose initial simplex points as the best parameters so far
499         //         int kmeansNMPoints[n*(n+1)];
500         //         float kmeansVals[n+1];
501         //         for (int i=0;i<n+1;++i) {
502         //             kmeansNMPoints[i*n] = (int)kmeansCosts[i].params["branching"];
503         //             kmeansNMPoints[i*n+1] = (int)kmeansCosts[i].params["max-iterations"];
504         //             kmeansVals[i] = kmeansCosts[i].timeCost;
505         //         }
506         //         KMeansSimpleDownhillFunctor kmeans_cost_func(*this);
507         //         // run optimization
508         //         optimizeSimplexDownhill(kmeansNMPoints,n,kmeans_cost_func,kmeansVals);
509         //         // store results
510         //         for (int i=0;i<n+1;++i) {
511         //             kmeansCosts[i].params["branching"] = kmeansNMPoints[i*2];
512         //             kmeansCosts[i].params["max-iterations"] = kmeansNMPoints[i*2+1];
513         //             kmeansCosts[i].timeCost = kmeansVals[i];
514         //         }
515     }
516 
517 
optimizeKDTree(std::vector<CostData> & costs)518     void optimizeKDTree(std::vector<CostData>& costs)
519     {
520         Logger::info("KD-TREE, Step 1: Exploring parameter space\n");
521 
522         // explore kd-tree parameters space using the parameters below
523         int testTrees[] = { 1, 4, 8, 16, 32 };
524 
525         // evaluate kdtree for all parameter combinations
526         for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
527             CostData cost;
528             cost.params["algorithm"] = FLANN_INDEX_KDTREE;
529             cost.params["trees"] = testTrees[i];
530 
531             evaluate_kdtree(cost);
532             costs.push_back(cost);
533         }
534 
535         //         Logger::info("KD-TREE, Step 2: simplex-downhill optimization\n");
536         //
537         //         const int n = 1;
538         //         // choose initial simplex points as the best parameters so far
539         //         int kdtreeNMPoints[n*(n+1)];
540         //         float kdtreeVals[n+1];
541         //         for (int i=0;i<n+1;++i) {
542         //             kdtreeNMPoints[i] = (int)kdtreeCosts[i].params["trees"];
543         //             kdtreeVals[i] = kdtreeCosts[i].timeCost;
544         //         }
545         //         KDTreeSimpleDownhillFunctor kdtree_cost_func(*this);
546         //         // run optimization
547         //         optimizeSimplexDownhill(kdtreeNMPoints,n,kdtree_cost_func,kdtreeVals);
548         //         // store results
549         //         for (int i=0;i<n+1;++i) {
550         //             kdtreeCosts[i].params["trees"] = kdtreeNMPoints[i];
551         //             kdtreeCosts[i].timeCost = kdtreeVals[i];
552         //         }
553     }
554 
555     /**
556      *  Chooses the best nearest-neighbor algorithm and estimates the optimal
557      *  parameters to use when building the index (for a given precision).
558      *  Returns a dictionary with the optimal parameters.
559      */
estimateBuildParams()560     IndexParams estimateBuildParams()
561     {
562         std::vector<CostData> costs;
563 
564         int sampleSize = int(sample_fraction_ * dataset_.rows);
565         int testSampleSize = std::min(sampleSize / 10, 1000);
566 
567         Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
568 
569         // For a very small dataset, it makes no sense to build any fancy index, just
570         // use linear search
571         if (testSampleSize < 10) {
572             Logger::info("Choosing linear, dataset too small\n");
573             return LinearIndexParams();
574         }
575 
576         // We use a fraction of the original dataset to speedup the autotune algorithm
577         sampledDataset_ = random_sample(dataset_, sampleSize);
578         // We use a cross-validation approach, first we sample a testset from the dataset
579         testDataset_ = random_sample(sampledDataset_, testSampleSize, true);
580 
581         // We compute the ground truth using linear search
582         Logger::info("Computing ground truth... \n");
583         gt_matches_ = Matrix<size_t>(new size_t[testDataset_.rows], testDataset_.rows, 1);
584         StartStopTimer t;
585         int repeats = 0;
586         t.reset();
587         while (t.value<0.2) {
588         	repeats++;
589             t.start();
590         	compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
591             t.stop();
592         }
593 
594         CostData linear_cost;
595         linear_cost.searchTimeCost = (float)t.value/repeats;
596         linear_cost.buildTimeCost = 0;
597         linear_cost.memoryCost = 0;
598         linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR;
599 
600         costs.push_back(linear_cost);
601 
602         // Start parameter autotune process
603         Logger::info("Autotuning parameters...\n");
604 
605         optimizeKMeans(costs);
606         optimizeKDTree(costs);
607 
608         float bestTimeCost = costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost;
609         for (size_t i = 0; i < costs.size(); ++i) {
610             float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
611             Logger::debug("Time cost: %g\n", timeCost);
612             if (timeCost < bestTimeCost) {
613                 bestTimeCost = timeCost;
614             }
615         }
616         Logger::debug("Best time cost: %g\n", bestTimeCost);
617 
618     	IndexParams bestParams = costs[0].params;
619         if (bestTimeCost > 0) {
620         	float bestCost = (costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost) / bestTimeCost;
621         	for (size_t i = 0; i < costs.size(); ++i) {
622         		float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
623         				memory_weight_ * costs[i].memoryCost;
624         		Logger::debug("Cost: %g\n", crtCost);
625         		if (crtCost < bestCost) {
626         			bestCost = crtCost;
627         			bestParams = costs[i].params;
628         		}
629         	}
630             Logger::debug("Best cost: %g\n", bestCost);
631         }
632 
633         delete[] gt_matches_.ptr();
634         delete[] testDataset_.ptr();
635         delete[] sampledDataset_.ptr();
636 
637         return bestParams;
638     }
639 
640 
641 
642     /**
643      *  Estimates the search time parameters needed to get the desired precision.
644      *  Precondition: the index is built
645      *  Postcondition: the searchParams will have the optimum params set, also the speedup obtained over linear search.
646      */
estimateSearchParams(SearchParams & searchParams)647     float estimateSearchParams(SearchParams& searchParams)
648     {
649         const int nn = 1;
650         const size_t SAMPLE_COUNT = 1000;
651 
652         assert(bestIndex_ != NULL); // must have a valid index
653 
654         float speedup = 0;
655 
656         int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT);
657         if (samples > 0) {
658             Matrix<ElementType> testDataset = random_sample(dataset_, samples);
659 
660             Logger::info("Computing ground truth\n");
661 
662             // we need to compute the ground truth first
663             Matrix<size_t> gt_matches(new size_t[testDataset.rows], testDataset.rows, 1);
664             StartStopTimer t;
665             int repeats = 0;
666             t.reset();
667             while (t.value<0.2) {
668             	repeats++;
669                 t.start();
670             	compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
671                 t.stop();
672             }
673             float linear = (float)t.value/repeats;
674 
675             int checks;
676             Logger::info("Estimating number of checks\n");
677 
678             float searchTime;
679             float cb_index;
680             if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
681                 Logger::info("KMeans algorithm, estimating cluster border factor\n");
682                 KMeansIndex<Distance>* kmeans = static_cast<KMeansIndex<Distance>*>(bestIndex_);
683                 float bestSearchTime = -1;
684                 float best_cb_index = -1;
685                 int best_checks = -1;
686                 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
687                     kmeans->set_cb_index(cb_index);
688                     searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
689                     if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
690                         bestSearchTime = searchTime;
691                         best_cb_index = cb_index;
692                         best_checks = checks;
693                     }
694                 }
695                 searchTime = bestSearchTime;
696                 cb_index = best_cb_index;
697                 checks = best_checks;
698 
699                 kmeans->set_cb_index(best_cb_index);
700                 Logger::info("Optimum cb_index: %g\n", cb_index);
701                 bestParams_["cb_index"] = cb_index;
702             }
703             else {
704                 searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
705             }
706 
707             Logger::info("Required number of checks: %d \n", checks);
708             searchParams.checks = checks;
709 
710             speedup = linear / searchTime;
711 
712             delete[] gt_matches.ptr();
713             delete[] testDataset.ptr();
714         }
715 
716         return speedup;
717     }
718 
719 
swap(AutotunedIndex & other)720     void swap(AutotunedIndex& other)
721     {
722     	BaseClass::swap(other);
723     	std::swap(bestIndex_, other.bestIndex_);
724     	std::swap(bestParams_, other.bestParams_);
725     	std::swap(bestSearchParams_, other.bestSearchParams_);
726     	std::swap(speedup_, other.speedup_);
727     	std::swap(dataset_, other.dataset_);
728     	std::swap(target_precision_, other.target_precision_);
729     	std::swap(build_weight_, other.build_weight_);
730     	std::swap(memory_weight_, other.memory_weight_);
731     	std::swap(sample_fraction_, other.sample_fraction_);
732     }
733 
734 private:
735     NNIndex<Distance>* bestIndex_;
736 
737     IndexParams bestParams_;
738     SearchParams bestSearchParams_;
739 
740     Matrix<ElementType> sampledDataset_;
741     Matrix<ElementType> testDataset_;
742     Matrix<size_t> gt_matches_;
743 
744     float speedup_;
745 
746     /**
747      * The dataset used by this index
748      */
749     Matrix<ElementType> dataset_;
750 
751     /**
752      * Index parameters
753      */
754     float target_precision_;
755     float build_weight_;
756     float memory_weight_;
757     float sample_fraction_;
758 
759     USING_BASECLASS_SYMBOLS
760 };
761 }
762 
763 #endif /* FLANN_AUTOTUNED_INDEX_H_ */
764