1 /*********************************************************************** 2 * Software License Agreement (BSD License) 3 * 4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 6 * 7 * THE BSD LICENSE 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 *************************************************************************/ 30 #ifndef FLANN_AUTOTUNED_INDEX_H_ 31 #define FLANN_AUTOTUNED_INDEX_H_ 32 33 #include "flann/general.h" 34 #include "flann/algorithms/nn_index.h" 35 #include "flann/nn/ground_truth.h" 36 #include "flann/nn/index_testing.h" 37 #include "flann/util/sampling.h" 38 #include "flann/algorithms/kdtree_index.h" 39 #include "flann/algorithms/kdtree_single_index.h" 40 #include "flann/algorithms/kmeans_index.h" 41 #include "flann/algorithms/composite_index.h" 42 #include "flann/algorithms/linear_index.h" 43 #include "flann/util/logger.h" 44 45 46 namespace flann 47 { 48 49 template<typename Distance> 50 inline NNIndex<Distance>* 51 create_index_by_type(const flann_algorithm_t index_type, 52 const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance = Distance()); 53 54 55 struct AutotunedIndexParams : public IndexParams 56 { 57 AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1) 58 { 59 (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED; 60 // precision desired (used for autotuning, -1 otherwise) 61 (*this)["target_precision"] = target_precision; 62 // build tree time weighting factor 63 (*this)["build_weight"] = build_weight; 64 // index memory weighting factor 65 (*this)["memory_weight"] = memory_weight; 66 // what fraction of the dataset to use for autotuning 67 (*this)["sample_fraction"] = sample_fraction; 68 } 69 }; 70 71 72 template <typename Distance> 73 class AutotunedIndex : public NNIndex<Distance> 74 { 75 public: 76 typedef typename Distance::ElementType ElementType; 77 typedef typename Distance::ResultType DistanceType; 78 79 typedef NNIndex<Distance> BaseClass; 80 81 typedef AutotunedIndex<Distance> IndexType; 82 83 typedef bool needs_kdtree_distance; 84 85 AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) : BaseClass(params,d)86 BaseClass(params, d), bestIndex_(NULL), speedup_(0), dataset_(inputData) 87 { 88 target_precision_ = get_param(params, "target_precision",0.8f); 89 build_weight_ = get_param(params,"build_weight", 0.01f); 90 memory_weight_ = get_param(params, "memory_weight", 0.0f); 91 sample_fraction_ = get_param(params,"sample_fraction", 0.1f); 92 } 93 94 AutotunedIndex(const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) : BaseClass(params,d)95 BaseClass(params, d), bestIndex_(NULL), speedup_(0) 96 { 97 target_precision_ = get_param(params, "target_precision",0.8f); 98 build_weight_ = get_param(params,"build_weight", 0.01f); 99 memory_weight_ = get_param(params, "memory_weight", 0.0f); 100 sample_fraction_ = get_param(params,"sample_fraction", 0.1f); 101 } 102 AutotunedIndex(const AutotunedIndex & other)103 AutotunedIndex(const AutotunedIndex& other) : BaseClass(other), 104 bestParams_(other.bestParams_), 105 bestSearchParams_(other.bestSearchParams_), 106 speedup_(other.speedup_), 107 dataset_(other.dataset_), 108 target_precision_(other.target_precision_), 109 build_weight_(other.build_weight_), 110 memory_weight_(other.memory_weight_), 111 sample_fraction_(other.sample_fraction_) 112 { 113 bestIndex_ = other.bestIndex_->clone(); 114 } 115 116 AutotunedIndex& operator=(AutotunedIndex other) 117 { 118 this->swap(other); 119 return * this; 120 } 121 ~AutotunedIndex()122 virtual ~AutotunedIndex() 123 { 124 delete bestIndex_; 125 } 126 clone()127 BaseClass* clone() const 128 { 129 return new AutotunedIndex(*this); 130 } 131 132 /** 133 * Method responsible with building the index. 134 */ buildIndex()135 void buildIndex() 136 { 137 bestParams_ = estimateBuildParams(); 138 Logger::info("----------------------------------------------------\n"); 139 Logger::info("Autotuned parameters:\n"); 140 if (Logger::getLevel()>=FLANN_LOG_INFO) 141 print_params(bestParams_); 142 Logger::info("----------------------------------------------------\n"); 143 144 flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm"); 145 bestIndex_ = create_index_by_type(index_type, dataset_, bestParams_, distance_); 146 bestIndex_->buildIndex(); 147 speedup_ = estimateSearchParams(bestSearchParams_); 148 Logger::info("----------------------------------------------------\n"); 149 Logger::info("Search parameters:\n"); 150 if (Logger::getLevel()>=FLANN_LOG_INFO) 151 print_params(bestSearchParams_); 152 Logger::info("----------------------------------------------------\n"); 153 bestParams_["search_params"] = bestSearchParams_; 154 bestParams_["speedup"] = speedup_; 155 } 156 buildIndex(const Matrix<ElementType> & dataset)157 void buildIndex(const Matrix<ElementType>& dataset) 158 { 159 dataset_ = dataset; 160 this->buildIndex(); 161 } 162 163 164 void addPoints(const Matrix<ElementType>& points, float rebuild_threshold = 2) 165 { 166 if (bestIndex_) { 167 bestIndex_->addPoints(points, rebuild_threshold); 168 } 169 } 170 removePoint(size_t id)171 void removePoint(size_t id) 172 { 173 if (bestIndex_) { 174 bestIndex_->removePoint(id); 175 } 176 } 177 178 179 template<typename Archive> serialize(Archive & ar)180 void serialize(Archive& ar) 181 { 182 ar.setObject(this); 183 184 ar & *static_cast<NNIndex<Distance>*>(this); 185 186 ar & target_precision_; 187 ar & build_weight_; 188 ar & memory_weight_; 189 ar & sample_fraction_; 190 191 flann_algorithm_t index_type; 192 if (Archive::is_saving::value) { 193 index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm"); 194 } 195 ar & index_type; 196 ar & bestSearchParams_.checks; 197 198 if (Archive::is_loading::value) { 199 bestParams_["algorithm"] = index_type; 200 201 index_params_["algorithm"] = getType(); 202 index_params_["target_precision_"] = target_precision_; 203 index_params_["build_weight_"] = build_weight_; 204 index_params_["memory_weight_"] = memory_weight_; 205 index_params_["sample_fraction_"] = sample_fraction_; 206 } 207 } 208 saveIndex(FILE * stream)209 void saveIndex(FILE* stream) 210 { 211 { 212 serialization::SaveArchive sa(stream); 213 sa & *this; 214 } 215 216 bestIndex_->saveIndex(stream); 217 } 218 loadIndex(FILE * stream)219 void loadIndex(FILE* stream) 220 { 221 { 222 serialization::LoadArchive la(stream); 223 la & *this; 224 } 225 226 IndexParams params; 227 flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm"); 228 bestIndex_ = create_index_by_type<Distance>((flann_algorithm_t)index_type, dataset_, params, distance_); 229 bestIndex_->loadIndex(stream); 230 } 231 knnSearch(const Matrix<ElementType> & queries,Matrix<size_t> & indices,Matrix<DistanceType> & dists,size_t knn,const SearchParams & params)232 int knnSearch(const Matrix<ElementType>& queries, 233 Matrix<size_t>& indices, 234 Matrix<DistanceType>& dists, 235 size_t knn, 236 const SearchParams& params) const 237 { 238 if (params.checks == FLANN_CHECKS_AUTOTUNED) { 239 return bestIndex_->knnSearch(queries, indices, dists, knn, bestSearchParams_); 240 } 241 else { 242 return bestIndex_->knnSearch(queries, indices, dists, knn, params); 243 } 244 } 245 knnSearch(const Matrix<ElementType> & queries,std::vector<std::vector<size_t>> & indices,std::vector<std::vector<DistanceType>> & dists,size_t knn,const SearchParams & params)246 int knnSearch(const Matrix<ElementType>& queries, 247 std::vector< std::vector<size_t> >& indices, 248 std::vector<std::vector<DistanceType> >& dists, 249 size_t knn, 250 const SearchParams& params) const 251 { 252 if (params.checks == FLANN_CHECKS_AUTOTUNED) { 253 return bestIndex_->knnSearch(queries, indices, dists, knn, bestSearchParams_); 254 } 255 else { 256 return bestIndex_->knnSearch(queries, indices, dists, knn, params); 257 } 258 259 } 260 radiusSearch(const Matrix<ElementType> & queries,Matrix<size_t> & indices,Matrix<DistanceType> & dists,DistanceType radius,const SearchParams & params)261 int radiusSearch(const Matrix<ElementType>& queries, 262 Matrix<size_t>& indices, 263 Matrix<DistanceType>& dists, 264 DistanceType radius, 265 const SearchParams& params) const 266 { 267 if (params.checks == FLANN_CHECKS_AUTOTUNED) { 268 return bestIndex_->radiusSearch(queries, indices, dists, radius, bestSearchParams_); 269 } 270 else { 271 return bestIndex_->radiusSearch(queries, indices, dists, radius, params); 272 } 273 } 274 radiusSearch(const Matrix<ElementType> & queries,std::vector<std::vector<size_t>> & indices,std::vector<std::vector<DistanceType>> & dists,DistanceType radius,const SearchParams & params)275 int radiusSearch(const Matrix<ElementType>& queries, 276 std::vector< std::vector<size_t> >& indices, 277 std::vector<std::vector<DistanceType> >& dists, 278 DistanceType radius, 279 const SearchParams& params) const 280 { 281 if (params.checks == FLANN_CHECKS_AUTOTUNED) { 282 return bestIndex_->radiusSearch(queries, indices, dists, radius, bestSearchParams_); 283 } 284 else { 285 return bestIndex_->radiusSearch(queries, indices, dists, radius, params); 286 } 287 } 288 289 290 291 /** 292 * Method that searches for nearest-neighbors 293 */ findNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,const SearchParams & searchParams)294 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) const 295 { 296 // should not get here 297 assert(false); 298 } 299 getParameters()300 IndexParams getParameters() const 301 { 302 return bestParams_; 303 } 304 getSearchParameters()305 FLANN_DEPRECATED SearchParams getSearchParameters() const 306 { 307 return bestSearchParams_; 308 } 309 getSpeedup()310 FLANN_DEPRECATED float getSpeedup() const 311 { 312 return speedup_; 313 } 314 315 316 /** 317 * Number of features in this index. 318 */ size()319 size_t size() const 320 { 321 return bestIndex_->size(); 322 } 323 324 /** 325 * The length of each vector in this index. 326 */ veclen()327 size_t veclen() const 328 { 329 return bestIndex_->veclen(); 330 } 331 332 /** 333 * The amount of memory (in bytes) this index uses. 334 */ usedMemory()335 int usedMemory() const 336 { 337 return bestIndex_->usedMemory(); 338 } 339 340 /** 341 * Algorithm name 342 */ getType()343 flann_algorithm_t getType() const 344 { 345 return FLANN_INDEX_AUTOTUNED; 346 } 347 348 protected: buildIndexImpl()349 void buildIndexImpl() 350 { 351 /* nothing to do here */ 352 } 353 freeIndex()354 void freeIndex() 355 { 356 /* nothing to do here */ 357 } 358 359 private: 360 361 struct CostData 362 { 363 float searchTimeCost; 364 float buildTimeCost; 365 float memoryCost; 366 float totalCost; 367 IndexParams params; 368 }; 369 evaluate_kmeans(CostData & cost)370 void evaluate_kmeans(CostData& cost) 371 { 372 StartStopTimer t; 373 int checks; 374 const int nn = 1; 375 376 Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n", 377 get_param<int>(cost.params,"iterations"), 378 get_param<int>(cost.params,"branching")); 379 KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_); 380 // measure index build time 381 t.start(); 382 kmeans.buildIndex(); 383 t.stop(); 384 float buildTime = (float)t.value; 385 386 // measure search time 387 float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn); 388 389 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float)); 390 cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory; 391 cost.searchTimeCost = searchTime; 392 cost.buildTimeCost = buildTime; 393 Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_); 394 } 395 396 evaluate_kdtree(CostData & cost)397 void evaluate_kdtree(CostData& cost) 398 { 399 StartStopTimer t; 400 int checks; 401 const int nn = 1; 402 403 Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees")); 404 KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_); 405 406 t.start(); 407 kdtree.buildIndex(); 408 t.stop(); 409 float buildTime = (float)t.value; 410 411 //measure search time 412 float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn); 413 414 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float)); 415 cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory; 416 cost.searchTimeCost = searchTime; 417 cost.buildTimeCost = buildTime; 418 Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime); 419 } 420 421 422 // struct KMeansSimpleDownhillFunctor { 423 // 424 // Autotune& autotuner; 425 // KMeansSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}; 426 // 427 // float operator()(int* params) { 428 // 429 // float maxFloat = numeric_limits<float>::max(); 430 // 431 // if (params[0]<2) return maxFloat; 432 // if (params[1]<0) return maxFloat; 433 // 434 // CostData c; 435 // c.params["algorithm"] = KMEANS; 436 // c.params["centers-init"] = CENTERS_RANDOM; 437 // c.params["branching"] = params[0]; 438 // c.params["max-iterations"] = params[1]; 439 // 440 // autotuner.evaluate_kmeans(c); 441 // 442 // return c.timeCost; 443 // 444 // } 445 // }; 446 // 447 // struct KDTreeSimpleDownhillFunctor { 448 // 449 // Autotune& autotuner; 450 // KDTreeSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}; 451 // 452 // float operator()(int* params) { 453 // float maxFloat = numeric_limits<float>::max(); 454 // 455 // if (params[0]<1) return maxFloat; 456 // 457 // CostData c; 458 // c.params["algorithm"] = KDTREE; 459 // c.params["trees"] = params[0]; 460 // 461 // autotuner.evaluate_kdtree(c); 462 // 463 // return c.timeCost; 464 // 465 // } 466 // }; 467 468 469 optimizeKMeans(std::vector<CostData> & costs)470 void optimizeKMeans(std::vector<CostData>& costs) 471 { 472 Logger::info("KMEANS, Step 1: Exploring parameter space\n"); 473 474 // explore kmeans parameters space using combinations of the parameters below 475 int maxIterations[] = { 1, 5, 10, 15 }; 476 int branchingFactors[] = { 16, 32, 64, 128, 256 }; 477 478 int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors); 479 costs.reserve(costs.size() + kmeansParamSpaceSize); 480 481 // evaluate kmeans for all parameter combinations 482 for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) { 483 for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) { 484 CostData cost; 485 cost.params["algorithm"] = FLANN_INDEX_KMEANS; 486 cost.params["centers_init"] = FLANN_CENTERS_RANDOM; 487 cost.params["iterations"] = maxIterations[i]; 488 cost.params["branching"] = branchingFactors[j]; 489 490 evaluate_kmeans(cost); 491 costs.push_back(cost); 492 } 493 } 494 495 // Logger::info("KMEANS, Step 2: simplex-downhill optimization\n"); 496 // 497 // const int n = 2; 498 // // choose initial simplex points as the best parameters so far 499 // int kmeansNMPoints[n*(n+1)]; 500 // float kmeansVals[n+1]; 501 // for (int i=0;i<n+1;++i) { 502 // kmeansNMPoints[i*n] = (int)kmeansCosts[i].params["branching"]; 503 // kmeansNMPoints[i*n+1] = (int)kmeansCosts[i].params["max-iterations"]; 504 // kmeansVals[i] = kmeansCosts[i].timeCost; 505 // } 506 // KMeansSimpleDownhillFunctor kmeans_cost_func(*this); 507 // // run optimization 508 // optimizeSimplexDownhill(kmeansNMPoints,n,kmeans_cost_func,kmeansVals); 509 // // store results 510 // for (int i=0;i<n+1;++i) { 511 // kmeansCosts[i].params["branching"] = kmeansNMPoints[i*2]; 512 // kmeansCosts[i].params["max-iterations"] = kmeansNMPoints[i*2+1]; 513 // kmeansCosts[i].timeCost = kmeansVals[i]; 514 // } 515 } 516 517 optimizeKDTree(std::vector<CostData> & costs)518 void optimizeKDTree(std::vector<CostData>& costs) 519 { 520 Logger::info("KD-TREE, Step 1: Exploring parameter space\n"); 521 522 // explore kd-tree parameters space using the parameters below 523 int testTrees[] = { 1, 4, 8, 16, 32 }; 524 525 // evaluate kdtree for all parameter combinations 526 for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) { 527 CostData cost; 528 cost.params["algorithm"] = FLANN_INDEX_KDTREE; 529 cost.params["trees"] = testTrees[i]; 530 531 evaluate_kdtree(cost); 532 costs.push_back(cost); 533 } 534 535 // Logger::info("KD-TREE, Step 2: simplex-downhill optimization\n"); 536 // 537 // const int n = 1; 538 // // choose initial simplex points as the best parameters so far 539 // int kdtreeNMPoints[n*(n+1)]; 540 // float kdtreeVals[n+1]; 541 // for (int i=0;i<n+1;++i) { 542 // kdtreeNMPoints[i] = (int)kdtreeCosts[i].params["trees"]; 543 // kdtreeVals[i] = kdtreeCosts[i].timeCost; 544 // } 545 // KDTreeSimpleDownhillFunctor kdtree_cost_func(*this); 546 // // run optimization 547 // optimizeSimplexDownhill(kdtreeNMPoints,n,kdtree_cost_func,kdtreeVals); 548 // // store results 549 // for (int i=0;i<n+1;++i) { 550 // kdtreeCosts[i].params["trees"] = kdtreeNMPoints[i]; 551 // kdtreeCosts[i].timeCost = kdtreeVals[i]; 552 // } 553 } 554 555 /** 556 * Chooses the best nearest-neighbor algorithm and estimates the optimal 557 * parameters to use when building the index (for a given precision). 558 * Returns a dictionary with the optimal parameters. 559 */ estimateBuildParams()560 IndexParams estimateBuildParams() 561 { 562 std::vector<CostData> costs; 563 564 int sampleSize = int(sample_fraction_ * dataset_.rows); 565 int testSampleSize = std::min(sampleSize / 10, 1000); 566 567 Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_); 568 569 // For a very small dataset, it makes no sense to build any fancy index, just 570 // use linear search 571 if (testSampleSize < 10) { 572 Logger::info("Choosing linear, dataset too small\n"); 573 return LinearIndexParams(); 574 } 575 576 // We use a fraction of the original dataset to speedup the autotune algorithm 577 sampledDataset_ = random_sample(dataset_, sampleSize); 578 // We use a cross-validation approach, first we sample a testset from the dataset 579 testDataset_ = random_sample(sampledDataset_, testSampleSize, true); 580 581 // We compute the ground truth using linear search 582 Logger::info("Computing ground truth... \n"); 583 gt_matches_ = Matrix<size_t>(new size_t[testDataset_.rows], testDataset_.rows, 1); 584 StartStopTimer t; 585 int repeats = 0; 586 t.reset(); 587 while (t.value<0.2) { 588 repeats++; 589 t.start(); 590 compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_); 591 t.stop(); 592 } 593 594 CostData linear_cost; 595 linear_cost.searchTimeCost = (float)t.value/repeats; 596 linear_cost.buildTimeCost = 0; 597 linear_cost.memoryCost = 0; 598 linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR; 599 600 costs.push_back(linear_cost); 601 602 // Start parameter autotune process 603 Logger::info("Autotuning parameters...\n"); 604 605 optimizeKMeans(costs); 606 optimizeKDTree(costs); 607 608 float bestTimeCost = costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost; 609 for (size_t i = 0; i < costs.size(); ++i) { 610 float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost; 611 Logger::debug("Time cost: %g\n", timeCost); 612 if (timeCost < bestTimeCost) { 613 bestTimeCost = timeCost; 614 } 615 } 616 Logger::debug("Best time cost: %g\n", bestTimeCost); 617 618 IndexParams bestParams = costs[0].params; 619 if (bestTimeCost > 0) { 620 float bestCost = (costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost) / bestTimeCost; 621 for (size_t i = 0; i < costs.size(); ++i) { 622 float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost + 623 memory_weight_ * costs[i].memoryCost; 624 Logger::debug("Cost: %g\n", crtCost); 625 if (crtCost < bestCost) { 626 bestCost = crtCost; 627 bestParams = costs[i].params; 628 } 629 } 630 Logger::debug("Best cost: %g\n", bestCost); 631 } 632 633 delete[] gt_matches_.ptr(); 634 delete[] testDataset_.ptr(); 635 delete[] sampledDataset_.ptr(); 636 637 return bestParams; 638 } 639 640 641 642 /** 643 * Estimates the search time parameters needed to get the desired precision. 644 * Precondition: the index is built 645 * Postcondition: the searchParams will have the optimum params set, also the speedup obtained over linear search. 646 */ estimateSearchParams(SearchParams & searchParams)647 float estimateSearchParams(SearchParams& searchParams) 648 { 649 const int nn = 1; 650 const size_t SAMPLE_COUNT = 1000; 651 652 assert(bestIndex_ != NULL); // must have a valid index 653 654 float speedup = 0; 655 656 int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT); 657 if (samples > 0) { 658 Matrix<ElementType> testDataset = random_sample(dataset_, samples); 659 660 Logger::info("Computing ground truth\n"); 661 662 // we need to compute the ground truth first 663 Matrix<size_t> gt_matches(new size_t[testDataset.rows], testDataset.rows, 1); 664 StartStopTimer t; 665 int repeats = 0; 666 t.reset(); 667 while (t.value<0.2) { 668 repeats++; 669 t.start(); 670 compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_); 671 t.stop(); 672 } 673 float linear = (float)t.value/repeats; 674 675 int checks; 676 Logger::info("Estimating number of checks\n"); 677 678 float searchTime; 679 float cb_index; 680 if (bestIndex_->getType() == FLANN_INDEX_KMEANS) { 681 Logger::info("KMeans algorithm, estimating cluster border factor\n"); 682 KMeansIndex<Distance>* kmeans = static_cast<KMeansIndex<Distance>*>(bestIndex_); 683 float bestSearchTime = -1; 684 float best_cb_index = -1; 685 int best_checks = -1; 686 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) { 687 kmeans->set_cb_index(cb_index); 688 searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1); 689 if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) { 690 bestSearchTime = searchTime; 691 best_cb_index = cb_index; 692 best_checks = checks; 693 } 694 } 695 searchTime = bestSearchTime; 696 cb_index = best_cb_index; 697 checks = best_checks; 698 699 kmeans->set_cb_index(best_cb_index); 700 Logger::info("Optimum cb_index: %g\n", cb_index); 701 bestParams_["cb_index"] = cb_index; 702 } 703 else { 704 searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1); 705 } 706 707 Logger::info("Required number of checks: %d \n", checks); 708 searchParams.checks = checks; 709 710 speedup = linear / searchTime; 711 712 delete[] gt_matches.ptr(); 713 delete[] testDataset.ptr(); 714 } 715 716 return speedup; 717 } 718 719 swap(AutotunedIndex & other)720 void swap(AutotunedIndex& other) 721 { 722 BaseClass::swap(other); 723 std::swap(bestIndex_, other.bestIndex_); 724 std::swap(bestParams_, other.bestParams_); 725 std::swap(bestSearchParams_, other.bestSearchParams_); 726 std::swap(speedup_, other.speedup_); 727 std::swap(dataset_, other.dataset_); 728 std::swap(target_precision_, other.target_precision_); 729 std::swap(build_weight_, other.build_weight_); 730 std::swap(memory_weight_, other.memory_weight_); 731 std::swap(sample_fraction_, other.sample_fraction_); 732 } 733 734 private: 735 NNIndex<Distance>* bestIndex_; 736 737 IndexParams bestParams_; 738 SearchParams bestSearchParams_; 739 740 Matrix<ElementType> sampledDataset_; 741 Matrix<ElementType> testDataset_; 742 Matrix<size_t> gt_matches_; 743 744 float speedup_; 745 746 /** 747 * The dataset used by this index 748 */ 749 Matrix<ElementType> dataset_; 750 751 /** 752 * Index parameters 753 */ 754 float target_precision_; 755 float build_weight_; 756 float memory_weight_; 757 float sample_fraction_; 758 759 USING_BASECLASS_SYMBOLS 760 }; 761 } 762 763 #endif /* FLANN_AUTOTUNED_INDEX_H_ */ 764