1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 #ifndef FLANN_AUTOTUNED_INDEX_H_
31 #define FLANN_AUTOTUNED_INDEX_H_
32 
33 #include "flann/general.h"
34 #include "flann/algorithms/nn_index.h"
35 #include "flann/nn/ground_truth.h"
36 #include "flann/nn/index_testing.h"
37 #include "flann/util/sampling.h"
38 #include "flann/algorithms/kdtree_index.h"
39 #include "flann/algorithms/kdtree_single_index.h"
40 #include "flann/algorithms/kmeans_index.h"
41 #include "flann/algorithms/composite_index.h"
42 #include "flann/algorithms/linear_index.h"
43 #include "flann/util/logger.h"
44 
45 
46 namespace flann
47 {
48 
49 template<typename Distance>
50 inline NNIndex<Distance>*
51   create_index_by_type(const flann_algorithm_t index_type,
52         const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance = Distance());
53 
54 
55 struct AutotunedIndexParams : public IndexParams
56 {
57     AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1)
58     {
59         (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED;
60         // precision desired (used for autotuning, -1 otherwise)
61         (*this)["target_precision"] = target_precision;
62         // build tree time weighting factor
63         (*this)["build_weight"] = build_weight;
64         // index memory weighting factor
65         (*this)["memory_weight"] = memory_weight;
66         // what fraction of the dataset to use for autotuning
67         (*this)["sample_fraction"] = sample_fraction;
68     }
69 };
70 
71 
72 template <typename Distance>
73 class AutotunedIndex : public NNIndex<Distance>
74 {
75 public:
76     typedef typename Distance::ElementType ElementType;
77     typedef typename Distance::ResultType DistanceType;
78 
79     typedef NNIndex<Distance> BaseClass;
80 
81     typedef AutotunedIndex<Distance> IndexType;
82 
83     typedef bool needs_kdtree_distance;
84 
85     AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
BaseClass(params,d)86         BaseClass(params, d), bestIndex_(NULL), speedup_(0), dataset_(inputData)
87     {
88         target_precision_ = get_param(params, "target_precision",0.8f);
89         build_weight_ =  get_param(params,"build_weight", 0.01f);
90         memory_weight_ = get_param(params, "memory_weight", 0.0f);
91         sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
92     }
93 
94     AutotunedIndex(const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
BaseClass(params,d)95         BaseClass(params, d), bestIndex_(NULL), speedup_(0)
96     {
97         target_precision_ = get_param(params, "target_precision",0.8f);
98         build_weight_ =  get_param(params,"build_weight", 0.01f);
99         memory_weight_ = get_param(params, "memory_weight", 0.0f);
100         sample_fraction_ = get_param(params,"sample_fraction", 0.1f);
101     }
102 
AutotunedIndex(const AutotunedIndex & other)103     AutotunedIndex(const AutotunedIndex& other) : BaseClass(other),
104     		bestParams_(other.bestParams_),
105     		bestSearchParams_(other.bestSearchParams_),
106     		speedup_(other.speedup_),
107     		dataset_(other.dataset_),
108     		target_precision_(other.target_precision_),
109     		build_weight_(other.build_weight_),
110     		memory_weight_(other.memory_weight_),
111     		sample_fraction_(other.sample_fraction_)
112     {
113     		bestIndex_ = other.bestIndex_->clone();
114     }
115 
116     AutotunedIndex& operator=(AutotunedIndex other)
117     {
118     	this->swap(other);
119     	return * this;
120     }
121 
~AutotunedIndex()122     virtual ~AutotunedIndex()
123     {
124     	delete bestIndex_;
125     }
126 
clone()127     BaseClass* clone() const
128     {
129     	return new AutotunedIndex(*this);
130     }
131 
132     /**
133      *          Method responsible with building the index.
134      */
buildIndex()135     void buildIndex()
136     {
137         bestParams_ = estimateBuildParams();
138         Logger::info("----------------------------------------------------\n");
139         Logger::info("Autotuned parameters:\n");
140         if (Logger::getLevel()>=FLANN_LOG_INFO)
141         	print_params(bestParams_);
142         Logger::info("----------------------------------------------------\n");
143 
144         flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm");
145         bestIndex_ = create_index_by_type(index_type, dataset_, bestParams_, distance_);
146         bestIndex_->buildIndex();
147         speedup_ = estimateSearchParams(bestSearchParams_);
148         Logger::info("----------------------------------------------------\n");
149         Logger::info("Search parameters:\n");
150         if (Logger::getLevel()>=FLANN_LOG_INFO)
151         	print_params(bestSearchParams_);
152         Logger::info("----------------------------------------------------\n");
153         bestParams_["search_params"] = bestSearchParams_;
154         bestParams_["speedup"] = speedup_;
155     }
156 
buildIndex(const Matrix<ElementType> & dataset)157     void buildIndex(const Matrix<ElementType>& dataset)
158     {
159     	dataset_ = dataset;
160     	this->buildIndex();
161     }
162 
163 
164     void addPoints(const Matrix<ElementType>& points, float rebuild_threshold = 2)
165     {
166         if (bestIndex_) {
167             bestIndex_->addPoints(points, rebuild_threshold);
168         }
169     }
170 
removePoint(size_t id)171     void removePoint(size_t id)
172     {
173         if (bestIndex_) {
174             bestIndex_->removePoint(id);
175         }
176     }
177 
178 
179     template<typename Archive>
serialize(Archive & ar)180     void serialize(Archive& ar)
181     {
182     	ar.setObject(this);
183 
184     	ar & *static_cast<NNIndex<Distance>*>(this);
185 
186     	ar & target_precision_;
187     	ar & build_weight_;
188     	ar & memory_weight_;
189     	ar & sample_fraction_;
190 
191     	flann_algorithm_t index_type;
192     	if (Archive::is_saving::value) {
193     		index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm");
194     	}
195     	ar & index_type;
196     	ar & bestSearchParams_.checks;
197 
198     	if (Archive::is_loading::value) {
199     		bestParams_["algorithm"] = index_type;
200 
201     		index_params_["algorithm"] = getType();
202             index_params_["target_precision_"] = target_precision_;
203             index_params_["build_weight_"] = build_weight_;
204             index_params_["memory_weight_"] = memory_weight_;
205             index_params_["sample_fraction_"] = sample_fraction_;
206     	}
207     }
208 
saveIndex(FILE * stream)209     void saveIndex(FILE* stream)
210     {
211     	serialization::SaveArchive sa(stream);
212     	sa & *this;
213 
214     	bestIndex_->saveIndex(stream);
215     }
216 
loadIndex(FILE * stream)217     void loadIndex(FILE* stream)
218     {
219     	serialization::LoadArchive la(stream);
220     	la & *this;
221 
222         IndexParams params;
223         flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm");
224         bestIndex_ = create_index_by_type<Distance>((flann_algorithm_t)index_type, dataset_, params, distance_);
225         bestIndex_->loadIndex(stream);
226     }
227 
knnSearch(const Matrix<ElementType> & queries,Matrix<size_t> & indices,Matrix<DistanceType> & dists,size_t knn,const SearchParams & params)228     int knnSearch(const Matrix<ElementType>& queries,
229             Matrix<size_t>& indices,
230             Matrix<DistanceType>& dists,
231             size_t knn,
232             const SearchParams& params) const
233     {
234         if (params.checks == FLANN_CHECKS_AUTOTUNED) {
235             return bestIndex_->knnSearch(queries, indices, dists, knn, bestSearchParams_);
236         }
237         else {
238             return bestIndex_->knnSearch(queries, indices, dists, knn, params);
239         }
240     }
241 
knnSearch(const Matrix<ElementType> & queries,std::vector<std::vector<size_t>> & indices,std::vector<std::vector<DistanceType>> & dists,size_t knn,const SearchParams & params)242     int knnSearch(const Matrix<ElementType>& queries,
243             std::vector< std::vector<size_t> >& indices,
244             std::vector<std::vector<DistanceType> >& dists,
245             size_t knn,
246             const SearchParams& params) const
247     {
248         if (params.checks == FLANN_CHECKS_AUTOTUNED) {
249             return bestIndex_->knnSearch(queries, indices, dists, knn, bestSearchParams_);
250         }
251         else {
252             return bestIndex_->knnSearch(queries, indices, dists, knn, params);
253         }
254 
255     }
256 
radiusSearch(const Matrix<ElementType> & queries,Matrix<size_t> & indices,Matrix<DistanceType> & dists,DistanceType radius,const SearchParams & params)257     int radiusSearch(const Matrix<ElementType>& queries,
258             Matrix<size_t>& indices,
259             Matrix<DistanceType>& dists,
260             DistanceType radius,
261             const SearchParams& params) const
262     {
263         if (params.checks == FLANN_CHECKS_AUTOTUNED) {
264             return bestIndex_->radiusSearch(queries, indices, dists, radius, bestSearchParams_);
265         }
266         else {
267             return bestIndex_->radiusSearch(queries, indices, dists, radius, params);
268         }
269     }
270 
radiusSearch(const Matrix<ElementType> & queries,std::vector<std::vector<size_t>> & indices,std::vector<std::vector<DistanceType>> & dists,DistanceType radius,const SearchParams & params)271     int radiusSearch(const Matrix<ElementType>& queries,
272             std::vector< std::vector<size_t> >& indices,
273             std::vector<std::vector<DistanceType> >& dists,
274             DistanceType radius,
275             const SearchParams& params) const
276     {
277         if (params.checks == FLANN_CHECKS_AUTOTUNED) {
278             return bestIndex_->radiusSearch(queries, indices, dists, radius, bestSearchParams_);
279         }
280         else {
281             return bestIndex_->radiusSearch(queries, indices, dists, radius, params);
282         }
283     }
284 
285 
286 
287     /**
288      *      Method that searches for nearest-neighbors
289      */
findNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,const SearchParams & searchParams)290     void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) const
291     {
292         // should not get here
293         assert(false);
294     }
295 
getParameters()296     IndexParams getParameters() const
297     {
298         return bestParams_;
299     }
300 
getSearchParameters()301     FLANN_DEPRECATED SearchParams getSearchParameters() const
302     {
303         return bestSearchParams_;
304     }
305 
getSpeedup()306     FLANN_DEPRECATED float getSpeedup() const
307     {
308         return speedup_;
309     }
310 
311 
312     /**
313      *      Number of features in this index.
314      */
size()315     size_t size() const
316     {
317         return bestIndex_->size();
318     }
319 
320     /**
321      *  The length of each vector in this index.
322      */
veclen()323     size_t veclen() const
324     {
325         return bestIndex_->veclen();
326     }
327 
328     /**
329      * The amount of memory (in bytes) this index uses.
330      */
usedMemory()331     int usedMemory() const
332     {
333         return bestIndex_->usedMemory();
334     }
335 
336     /**
337      * Algorithm name
338      */
getType()339     flann_algorithm_t getType() const
340     {
341         return FLANN_INDEX_AUTOTUNED;
342     }
343 
344 protected:
buildIndexImpl()345     void buildIndexImpl()
346     {
347         /* nothing to do here */
348     }
349 
freeIndex()350     void freeIndex()
351     {
352         /* nothing to do here */
353     }
354 
355 private:
356 
357     struct CostData
358     {
359         float searchTimeCost;
360         float buildTimeCost;
361         float memoryCost;
362         float totalCost;
363         IndexParams params;
364     };
365 
evaluate_kmeans(CostData & cost)366     void evaluate_kmeans(CostData& cost)
367     {
368         StartStopTimer t;
369         int checks;
370         const int nn = 1;
371 
372         Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n",
373                      get_param<int>(cost.params,"iterations"),
374                      get_param<int>(cost.params,"branching"));
375         KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_);
376         // measure index build time
377         t.start();
378         kmeans.buildIndex();
379         t.stop();
380         float buildTime = (float)t.value;
381 
382         // measure search time
383         float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
384 
385         float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
386         cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory;
387         cost.searchTimeCost = searchTime;
388         cost.buildTimeCost = buildTime;
389         Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
390     }
391 
392 
evaluate_kdtree(CostData & cost)393     void evaluate_kdtree(CostData& cost)
394     {
395         StartStopTimer t;
396         int checks;
397         const int nn = 1;
398 
399         Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees"));
400         KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
401 
402         t.start();
403         kdtree.buildIndex();
404         t.stop();
405         float buildTime = (float)t.value;
406 
407         //measure search time
408         float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
409 
410         float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float));
411         cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
412         cost.searchTimeCost = searchTime;
413         cost.buildTimeCost = buildTime;
414         Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
415     }
416 
417 
418     //    struct KMeansSimpleDownhillFunctor {
419     //
420     //        Autotune& autotuner;
421     //        KMeansSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {};
422     //
423     //        float operator()(int* params) {
424     //
425     //            float maxFloat = numeric_limits<float>::max();
426     //
427     //            if (params[0]<2) return maxFloat;
428     //            if (params[1]<0) return maxFloat;
429     //
430     //            CostData c;
431     //            c.params["algorithm"] = KMEANS;
432     //            c.params["centers-init"] = CENTERS_RANDOM;
433     //            c.params["branching"] = params[0];
434     //            c.params["max-iterations"] = params[1];
435     //
436     //            autotuner.evaluate_kmeans(c);
437     //
438     //            return c.timeCost;
439     //
440     //        }
441     //    };
442     //
443     //    struct KDTreeSimpleDownhillFunctor {
444     //
445     //        Autotune& autotuner;
446     //        KDTreeSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {};
447     //
448     //        float operator()(int* params) {
449     //            float maxFloat = numeric_limits<float>::max();
450     //
451     //            if (params[0]<1) return maxFloat;
452     //
453     //            CostData c;
454     //            c.params["algorithm"] = KDTREE;
455     //            c.params["trees"] = params[0];
456     //
457     //            autotuner.evaluate_kdtree(c);
458     //
459     //            return c.timeCost;
460     //
461     //        }
462     //    };
463 
464 
465 
optimizeKMeans(std::vector<CostData> & costs)466     void optimizeKMeans(std::vector<CostData>& costs)
467     {
468         Logger::info("KMEANS, Step 1: Exploring parameter space\n");
469 
470         // explore kmeans parameters space using combinations of the parameters below
471         int maxIterations[] = { 1, 5, 10, 15 };
472         int branchingFactors[] = { 16, 32, 64, 128, 256 };
473 
474         int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
475         costs.reserve(costs.size() + kmeansParamSpaceSize);
476 
477         // evaluate kmeans for all parameter combinations
478         for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
479             for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
480                 CostData cost;
481                 cost.params["algorithm"] = FLANN_INDEX_KMEANS;
482                 cost.params["centers_init"] = FLANN_CENTERS_RANDOM;
483                 cost.params["iterations"] = maxIterations[i];
484                 cost.params["branching"] = branchingFactors[j];
485 
486                 evaluate_kmeans(cost);
487                 costs.push_back(cost);
488             }
489         }
490 
491         //         Logger::info("KMEANS, Step 2: simplex-downhill optimization\n");
492         //
493         //         const int n = 2;
494         //         // choose initial simplex points as the best parameters so far
495         //         int kmeansNMPoints[n*(n+1)];
496         //         float kmeansVals[n+1];
497         //         for (int i=0;i<n+1;++i) {
498         //             kmeansNMPoints[i*n] = (int)kmeansCosts[i].params["branching"];
499         //             kmeansNMPoints[i*n+1] = (int)kmeansCosts[i].params["max-iterations"];
500         //             kmeansVals[i] = kmeansCosts[i].timeCost;
501         //         }
502         //         KMeansSimpleDownhillFunctor kmeans_cost_func(*this);
503         //         // run optimization
504         //         optimizeSimplexDownhill(kmeansNMPoints,n,kmeans_cost_func,kmeansVals);
505         //         // store results
506         //         for (int i=0;i<n+1;++i) {
507         //             kmeansCosts[i].params["branching"] = kmeansNMPoints[i*2];
508         //             kmeansCosts[i].params["max-iterations"] = kmeansNMPoints[i*2+1];
509         //             kmeansCosts[i].timeCost = kmeansVals[i];
510         //         }
511     }
512 
513 
optimizeKDTree(std::vector<CostData> & costs)514     void optimizeKDTree(std::vector<CostData>& costs)
515     {
516         Logger::info("KD-TREE, Step 1: Exploring parameter space\n");
517 
518         // explore kd-tree parameters space using the parameters below
519         int testTrees[] = { 1, 4, 8, 16, 32 };
520 
521         // evaluate kdtree for all parameter combinations
522         for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
523             CostData cost;
524             cost.params["algorithm"] = FLANN_INDEX_KDTREE;
525             cost.params["trees"] = testTrees[i];
526 
527             evaluate_kdtree(cost);
528             costs.push_back(cost);
529         }
530 
531         //         Logger::info("KD-TREE, Step 2: simplex-downhill optimization\n");
532         //
533         //         const int n = 1;
534         //         // choose initial simplex points as the best parameters so far
535         //         int kdtreeNMPoints[n*(n+1)];
536         //         float kdtreeVals[n+1];
537         //         for (int i=0;i<n+1;++i) {
538         //             kdtreeNMPoints[i] = (int)kdtreeCosts[i].params["trees"];
539         //             kdtreeVals[i] = kdtreeCosts[i].timeCost;
540         //         }
541         //         KDTreeSimpleDownhillFunctor kdtree_cost_func(*this);
542         //         // run optimization
543         //         optimizeSimplexDownhill(kdtreeNMPoints,n,kdtree_cost_func,kdtreeVals);
544         //         // store results
545         //         for (int i=0;i<n+1;++i) {
546         //             kdtreeCosts[i].params["trees"] = kdtreeNMPoints[i];
547         //             kdtreeCosts[i].timeCost = kdtreeVals[i];
548         //         }
549     }
550 
551     /**
552      *  Chooses the best nearest-neighbor algorithm and estimates the optimal
553      *  parameters to use when building the index (for a given precision).
554      *  Returns a dictionary with the optimal parameters.
555      */
estimateBuildParams()556     IndexParams estimateBuildParams()
557     {
558         std::vector<CostData> costs;
559 
560         int sampleSize = int(sample_fraction_ * dataset_.rows);
561         int testSampleSize = std::min(sampleSize / 10, 1000);
562 
563         Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
564 
565         // For a very small dataset, it makes no sense to build any fancy index, just
566         // use linear search
567         if (testSampleSize < 10) {
568             Logger::info("Choosing linear, dataset too small\n");
569             return LinearIndexParams();
570         }
571 
572         // We use a fraction of the original dataset to speedup the autotune algorithm
573         sampledDataset_ = random_sample(dataset_, sampleSize);
574         // We use a cross-validation approach, first we sample a testset from the dataset
575         testDataset_ = random_sample(sampledDataset_, testSampleSize, true);
576 
577         // We compute the ground truth using linear search
578         Logger::info("Computing ground truth... \n");
579         gt_matches_ = Matrix<size_t>(new size_t[testDataset_.rows], testDataset_.rows, 1);
580         StartStopTimer t;
581         int repeats = 0;
582         t.reset();
583         while (t.value<0.2) {
584         	repeats++;
585             t.start();
586         	compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
587             t.stop();
588         }
589 
590         CostData linear_cost;
591         linear_cost.searchTimeCost = (float)t.value/repeats;
592         linear_cost.buildTimeCost = 0;
593         linear_cost.memoryCost = 0;
594         linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR;
595 
596         costs.push_back(linear_cost);
597 
598         // Start parameter autotune process
599         Logger::info("Autotuning parameters...\n");
600 
601         optimizeKMeans(costs);
602         optimizeKDTree(costs);
603 
604         float bestTimeCost = costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost;
605         for (size_t i = 0; i < costs.size(); ++i) {
606             float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
607             Logger::debug("Time cost: %g\n", timeCost);
608             if (timeCost < bestTimeCost) {
609                 bestTimeCost = timeCost;
610             }
611         }
612         Logger::debug("Best time cost: %g\n", bestTimeCost);
613 
614     	IndexParams bestParams = costs[0].params;
615         if (bestTimeCost > 0) {
616         	float bestCost = (costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost) / bestTimeCost;
617         	for (size_t i = 0; i < costs.size(); ++i) {
618         		float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
619         				memory_weight_ * costs[i].memoryCost;
620         		Logger::debug("Cost: %g\n", crtCost);
621         		if (crtCost < bestCost) {
622         			bestCost = crtCost;
623         			bestParams = costs[i].params;
624         		}
625         	}
626             Logger::debug("Best cost: %g\n", bestCost);
627         }
628 
629         delete[] gt_matches_.ptr();
630         delete[] testDataset_.ptr();
631         delete[] sampledDataset_.ptr();
632 
633         return bestParams;
634     }
635 
636 
637 
638     /**
639      *  Estimates the search time parameters needed to get the desired precision.
640      *  Precondition: the index is built
641      *  Postcondition: the searchParams will have the optimum params set, also the speedup obtained over linear search.
642      */
estimateSearchParams(SearchParams & searchParams)643     float estimateSearchParams(SearchParams& searchParams)
644     {
645         const int nn = 1;
646         const size_t SAMPLE_COUNT = 1000;
647 
648         assert(bestIndex_ != NULL); // must have a valid index
649 
650         float speedup = 0;
651 
652         int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT);
653         if (samples > 0) {
654             Matrix<ElementType> testDataset = random_sample(dataset_, samples);
655 
656             Logger::info("Computing ground truth\n");
657 
658             // we need to compute the ground truth first
659             Matrix<size_t> gt_matches(new size_t[testDataset.rows], testDataset.rows, 1);
660             StartStopTimer t;
661             int repeats = 0;
662             t.reset();
663             while (t.value<0.2) {
664             	repeats++;
665                 t.start();
666             	compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
667                 t.stop();
668             }
669             float linear = (float)t.value/repeats;
670 
671             int checks;
672             Logger::info("Estimating number of checks\n");
673 
674             float searchTime;
675             float cb_index;
676             if (bestIndex_->getType() == FLANN_INDEX_KMEANS) {
677                 Logger::info("KMeans algorithm, estimating cluster border factor\n");
678                 KMeansIndex<Distance>* kmeans = static_cast<KMeansIndex<Distance>*>(bestIndex_);
679                 float bestSearchTime = -1;
680                 float best_cb_index = -1;
681                 int best_checks = -1;
682                 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
683                     kmeans->set_cb_index(cb_index);
684                     searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
685                     if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
686                         bestSearchTime = searchTime;
687                         best_cb_index = cb_index;
688                         best_checks = checks;
689                     }
690                 }
691                 searchTime = bestSearchTime;
692                 cb_index = best_cb_index;
693                 checks = best_checks;
694 
695                 kmeans->set_cb_index(best_cb_index);
696                 Logger::info("Optimum cb_index: %g\n", cb_index);
697                 bestParams_["cb_index"] = cb_index;
698             }
699             else {
700                 searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
701             }
702 
703             Logger::info("Required number of checks: %d \n", checks);
704             searchParams.checks = checks;
705 
706             speedup = linear / searchTime;
707 
708             delete[] gt_matches.ptr();
709             delete[] testDataset.ptr();
710         }
711 
712         return speedup;
713     }
714 
715 
swap(AutotunedIndex & other)716     void swap(AutotunedIndex& other)
717     {
718     	BaseClass::swap(other);
719     	std::swap(bestIndex_, other.bestIndex_);
720     	std::swap(bestParams_, other.bestParams_);
721     	std::swap(bestSearchParams_, other.bestSearchParams_);
722     	std::swap(speedup_, other.speedup_);
723     	std::swap(dataset_, other.dataset_);
724     	std::swap(target_precision_, other.target_precision_);
725     	std::swap(build_weight_, other.build_weight_);
726     	std::swap(memory_weight_, other.memory_weight_);
727     	std::swap(sample_fraction_, other.sample_fraction_);
728     }
729 
730 private:
731     NNIndex<Distance>* bestIndex_;
732 
733     IndexParams bestParams_;
734     SearchParams bestSearchParams_;
735 
736     Matrix<ElementType> sampledDataset_;
737     Matrix<ElementType> testDataset_;
738     Matrix<size_t> gt_matches_;
739 
740     float speedup_;
741 
742     /**
743      * The dataset used by this index
744      */
745     Matrix<ElementType> dataset_;
746 
747     /**
748      * Index parameters
749      */
750     float target_precision_;
751     float build_weight_;
752     float memory_weight_;
753     float sample_fraction_;
754 
755     USING_BASECLASS_SYMBOLS
756 };
757 }
758 
759 #endif /* FLANN_AUTOTUNED_INDEX_H_ */
760