1 /*********************************************************************** 2 * Software License Agreement (BSD License) 3 * 4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 6 * 7 * THE BSD LICENSE 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 *************************************************************************/ 30 #ifndef FLANN_AUTOTUNED_INDEX_H_ 31 #define FLANN_AUTOTUNED_INDEX_H_ 32 33 #include "flann/general.h" 34 #include "flann/algorithms/nn_index.h" 35 #include "flann/nn/ground_truth.h" 36 #include "flann/nn/index_testing.h" 37 #include "flann/util/sampling.h" 38 #include "flann/algorithms/kdtree_index.h" 39 #include "flann/algorithms/kdtree_single_index.h" 40 #include "flann/algorithms/kmeans_index.h" 41 #include "flann/algorithms/composite_index.h" 42 #include "flann/algorithms/linear_index.h" 43 #include "flann/util/logger.h" 44 45 46 namespace flann 47 { 48 49 template<typename Distance> 50 inline NNIndex<Distance>* 51 create_index_by_type(const flann_algorithm_t index_type, 52 const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance = Distance()); 53 54 55 struct AutotunedIndexParams : public IndexParams 56 { 57 AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1) 58 { 59 (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED; 60 // precision desired (used for autotuning, -1 otherwise) 61 (*this)["target_precision"] = target_precision; 62 // build tree time weighting factor 63 (*this)["build_weight"] = build_weight; 64 // index memory weighting factor 65 (*this)["memory_weight"] = memory_weight; 66 // what fraction of the dataset to use for autotuning 67 (*this)["sample_fraction"] = sample_fraction; 68 } 69 }; 70 71 72 template <typename Distance> 73 class AutotunedIndex : public NNIndex<Distance> 74 { 75 public: 76 typedef typename Distance::ElementType ElementType; 77 typedef typename Distance::ResultType DistanceType; 78 79 typedef NNIndex<Distance> BaseClass; 80 81 typedef AutotunedIndex<Distance> IndexType; 82 83 typedef bool needs_kdtree_distance; 84 85 AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) : BaseClass(params,d)86 BaseClass(params, d), bestIndex_(NULL), speedup_(0), dataset_(inputData) 87 { 88 target_precision_ = get_param(params, "target_precision",0.8f); 89 build_weight_ = get_param(params,"build_weight", 0.01f); 90 memory_weight_ = get_param(params, "memory_weight", 0.0f); 91 sample_fraction_ = get_param(params,"sample_fraction", 0.1f); 92 } 93 94 AutotunedIndex(const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) : BaseClass(params,d)95 BaseClass(params, d), bestIndex_(NULL), speedup_(0) 96 { 97 target_precision_ = get_param(params, "target_precision",0.8f); 98 build_weight_ = get_param(params,"build_weight", 0.01f); 99 memory_weight_ = get_param(params, "memory_weight", 0.0f); 100 sample_fraction_ = get_param(params,"sample_fraction", 0.1f); 101 } 102 AutotunedIndex(const AutotunedIndex & other)103 AutotunedIndex(const AutotunedIndex& other) : BaseClass(other), 104 bestParams_(other.bestParams_), 105 bestSearchParams_(other.bestSearchParams_), 106 speedup_(other.speedup_), 107 dataset_(other.dataset_), 108 target_precision_(other.target_precision_), 109 build_weight_(other.build_weight_), 110 memory_weight_(other.memory_weight_), 111 sample_fraction_(other.sample_fraction_) 112 { 113 bestIndex_ = other.bestIndex_->clone(); 114 } 115 116 AutotunedIndex& operator=(AutotunedIndex other) 117 { 118 this->swap(other); 119 return * this; 120 } 121 ~AutotunedIndex()122 virtual ~AutotunedIndex() 123 { 124 delete bestIndex_; 125 } 126 clone()127 BaseClass* clone() const 128 { 129 return new AutotunedIndex(*this); 130 } 131 132 /** 133 * Method responsible with building the index. 134 */ buildIndex()135 void buildIndex() 136 { 137 bestParams_ = estimateBuildParams(); 138 Logger::info("----------------------------------------------------\n"); 139 Logger::info("Autotuned parameters:\n"); 140 if (Logger::getLevel()>=FLANN_LOG_INFO) 141 print_params(bestParams_); 142 Logger::info("----------------------------------------------------\n"); 143 144 flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm"); 145 bestIndex_ = create_index_by_type(index_type, dataset_, bestParams_, distance_); 146 bestIndex_->buildIndex(); 147 speedup_ = estimateSearchParams(bestSearchParams_); 148 Logger::info("----------------------------------------------------\n"); 149 Logger::info("Search parameters:\n"); 150 if (Logger::getLevel()>=FLANN_LOG_INFO) 151 print_params(bestSearchParams_); 152 Logger::info("----------------------------------------------------\n"); 153 bestParams_["search_params"] = bestSearchParams_; 154 bestParams_["speedup"] = speedup_; 155 } 156 buildIndex(const Matrix<ElementType> & dataset)157 void buildIndex(const Matrix<ElementType>& dataset) 158 { 159 dataset_ = dataset; 160 this->buildIndex(); 161 } 162 163 164 void addPoints(const Matrix<ElementType>& points, float rebuild_threshold = 2) 165 { 166 if (bestIndex_) { 167 bestIndex_->addPoints(points, rebuild_threshold); 168 } 169 } 170 removePoint(size_t id)171 void removePoint(size_t id) 172 { 173 if (bestIndex_) { 174 bestIndex_->removePoint(id); 175 } 176 } 177 178 179 template<typename Archive> serialize(Archive & ar)180 void serialize(Archive& ar) 181 { 182 ar.setObject(this); 183 184 ar & *static_cast<NNIndex<Distance>*>(this); 185 186 ar & target_precision_; 187 ar & build_weight_; 188 ar & memory_weight_; 189 ar & sample_fraction_; 190 191 flann_algorithm_t index_type; 192 if (Archive::is_saving::value) { 193 index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm"); 194 } 195 ar & index_type; 196 ar & bestSearchParams_.checks; 197 198 if (Archive::is_loading::value) { 199 bestParams_["algorithm"] = index_type; 200 201 index_params_["algorithm"] = getType(); 202 index_params_["target_precision_"] = target_precision_; 203 index_params_["build_weight_"] = build_weight_; 204 index_params_["memory_weight_"] = memory_weight_; 205 index_params_["sample_fraction_"] = sample_fraction_; 206 } 207 } 208 saveIndex(FILE * stream)209 void saveIndex(FILE* stream) 210 { 211 serialization::SaveArchive sa(stream); 212 sa & *this; 213 214 bestIndex_->saveIndex(stream); 215 } 216 loadIndex(FILE * stream)217 void loadIndex(FILE* stream) 218 { 219 serialization::LoadArchive la(stream); 220 la & *this; 221 222 IndexParams params; 223 flann_algorithm_t index_type = get_param<flann_algorithm_t>(bestParams_,"algorithm"); 224 bestIndex_ = create_index_by_type<Distance>((flann_algorithm_t)index_type, dataset_, params, distance_); 225 bestIndex_->loadIndex(stream); 226 } 227 knnSearch(const Matrix<ElementType> & queries,Matrix<size_t> & indices,Matrix<DistanceType> & dists,size_t knn,const SearchParams & params)228 int knnSearch(const Matrix<ElementType>& queries, 229 Matrix<size_t>& indices, 230 Matrix<DistanceType>& dists, 231 size_t knn, 232 const SearchParams& params) const 233 { 234 if (params.checks == FLANN_CHECKS_AUTOTUNED) { 235 return bestIndex_->knnSearch(queries, indices, dists, knn, bestSearchParams_); 236 } 237 else { 238 return bestIndex_->knnSearch(queries, indices, dists, knn, params); 239 } 240 } 241 knnSearch(const Matrix<ElementType> & queries,std::vector<std::vector<size_t>> & indices,std::vector<std::vector<DistanceType>> & dists,size_t knn,const SearchParams & params)242 int knnSearch(const Matrix<ElementType>& queries, 243 std::vector< std::vector<size_t> >& indices, 244 std::vector<std::vector<DistanceType> >& dists, 245 size_t knn, 246 const SearchParams& params) const 247 { 248 if (params.checks == FLANN_CHECKS_AUTOTUNED) { 249 return bestIndex_->knnSearch(queries, indices, dists, knn, bestSearchParams_); 250 } 251 else { 252 return bestIndex_->knnSearch(queries, indices, dists, knn, params); 253 } 254 255 } 256 radiusSearch(const Matrix<ElementType> & queries,Matrix<size_t> & indices,Matrix<DistanceType> & dists,DistanceType radius,const SearchParams & params)257 int radiusSearch(const Matrix<ElementType>& queries, 258 Matrix<size_t>& indices, 259 Matrix<DistanceType>& dists, 260 DistanceType radius, 261 const SearchParams& params) const 262 { 263 if (params.checks == FLANN_CHECKS_AUTOTUNED) { 264 return bestIndex_->radiusSearch(queries, indices, dists, radius, bestSearchParams_); 265 } 266 else { 267 return bestIndex_->radiusSearch(queries, indices, dists, radius, params); 268 } 269 } 270 radiusSearch(const Matrix<ElementType> & queries,std::vector<std::vector<size_t>> & indices,std::vector<std::vector<DistanceType>> & dists,DistanceType radius,const SearchParams & params)271 int radiusSearch(const Matrix<ElementType>& queries, 272 std::vector< std::vector<size_t> >& indices, 273 std::vector<std::vector<DistanceType> >& dists, 274 DistanceType radius, 275 const SearchParams& params) const 276 { 277 if (params.checks == FLANN_CHECKS_AUTOTUNED) { 278 return bestIndex_->radiusSearch(queries, indices, dists, radius, bestSearchParams_); 279 } 280 else { 281 return bestIndex_->radiusSearch(queries, indices, dists, radius, params); 282 } 283 } 284 285 286 287 /** 288 * Method that searches for nearest-neighbors 289 */ findNeighbors(ResultSet<DistanceType> & result,const ElementType * vec,const SearchParams & searchParams)290 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) const 291 { 292 // should not get here 293 assert(false); 294 } 295 getParameters()296 IndexParams getParameters() const 297 { 298 return bestParams_; 299 } 300 getSearchParameters()301 FLANN_DEPRECATED SearchParams getSearchParameters() const 302 { 303 return bestSearchParams_; 304 } 305 getSpeedup()306 FLANN_DEPRECATED float getSpeedup() const 307 { 308 return speedup_; 309 } 310 311 312 /** 313 * Number of features in this index. 314 */ size()315 size_t size() const 316 { 317 return bestIndex_->size(); 318 } 319 320 /** 321 * The length of each vector in this index. 322 */ veclen()323 size_t veclen() const 324 { 325 return bestIndex_->veclen(); 326 } 327 328 /** 329 * The amount of memory (in bytes) this index uses. 330 */ usedMemory()331 int usedMemory() const 332 { 333 return bestIndex_->usedMemory(); 334 } 335 336 /** 337 * Algorithm name 338 */ getType()339 flann_algorithm_t getType() const 340 { 341 return FLANN_INDEX_AUTOTUNED; 342 } 343 344 protected: buildIndexImpl()345 void buildIndexImpl() 346 { 347 /* nothing to do here */ 348 } 349 freeIndex()350 void freeIndex() 351 { 352 /* nothing to do here */ 353 } 354 355 private: 356 357 struct CostData 358 { 359 float searchTimeCost; 360 float buildTimeCost; 361 float memoryCost; 362 float totalCost; 363 IndexParams params; 364 }; 365 evaluate_kmeans(CostData & cost)366 void evaluate_kmeans(CostData& cost) 367 { 368 StartStopTimer t; 369 int checks; 370 const int nn = 1; 371 372 Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n", 373 get_param<int>(cost.params,"iterations"), 374 get_param<int>(cost.params,"branching")); 375 KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_); 376 // measure index build time 377 t.start(); 378 kmeans.buildIndex(); 379 t.stop(); 380 float buildTime = (float)t.value; 381 382 // measure search time 383 float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn); 384 385 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float)); 386 cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory; 387 cost.searchTimeCost = searchTime; 388 cost.buildTimeCost = buildTime; 389 Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_); 390 } 391 392 evaluate_kdtree(CostData & cost)393 void evaluate_kdtree(CostData& cost) 394 { 395 StartStopTimer t; 396 int checks; 397 const int nn = 1; 398 399 Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees")); 400 KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_); 401 402 t.start(); 403 kdtree.buildIndex(); 404 t.stop(); 405 float buildTime = (float)t.value; 406 407 //measure search time 408 float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn); 409 410 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float)); 411 cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory; 412 cost.searchTimeCost = searchTime; 413 cost.buildTimeCost = buildTime; 414 Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime); 415 } 416 417 418 // struct KMeansSimpleDownhillFunctor { 419 // 420 // Autotune& autotuner; 421 // KMeansSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}; 422 // 423 // float operator()(int* params) { 424 // 425 // float maxFloat = numeric_limits<float>::max(); 426 // 427 // if (params[0]<2) return maxFloat; 428 // if (params[1]<0) return maxFloat; 429 // 430 // CostData c; 431 // c.params["algorithm"] = KMEANS; 432 // c.params["centers-init"] = CENTERS_RANDOM; 433 // c.params["branching"] = params[0]; 434 // c.params["max-iterations"] = params[1]; 435 // 436 // autotuner.evaluate_kmeans(c); 437 // 438 // return c.timeCost; 439 // 440 // } 441 // }; 442 // 443 // struct KDTreeSimpleDownhillFunctor { 444 // 445 // Autotune& autotuner; 446 // KDTreeSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {}; 447 // 448 // float operator()(int* params) { 449 // float maxFloat = numeric_limits<float>::max(); 450 // 451 // if (params[0]<1) return maxFloat; 452 // 453 // CostData c; 454 // c.params["algorithm"] = KDTREE; 455 // c.params["trees"] = params[0]; 456 // 457 // autotuner.evaluate_kdtree(c); 458 // 459 // return c.timeCost; 460 // 461 // } 462 // }; 463 464 465 optimizeKMeans(std::vector<CostData> & costs)466 void optimizeKMeans(std::vector<CostData>& costs) 467 { 468 Logger::info("KMEANS, Step 1: Exploring parameter space\n"); 469 470 // explore kmeans parameters space using combinations of the parameters below 471 int maxIterations[] = { 1, 5, 10, 15 }; 472 int branchingFactors[] = { 16, 32, 64, 128, 256 }; 473 474 int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors); 475 costs.reserve(costs.size() + kmeansParamSpaceSize); 476 477 // evaluate kmeans for all parameter combinations 478 for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) { 479 for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) { 480 CostData cost; 481 cost.params["algorithm"] = FLANN_INDEX_KMEANS; 482 cost.params["centers_init"] = FLANN_CENTERS_RANDOM; 483 cost.params["iterations"] = maxIterations[i]; 484 cost.params["branching"] = branchingFactors[j]; 485 486 evaluate_kmeans(cost); 487 costs.push_back(cost); 488 } 489 } 490 491 // Logger::info("KMEANS, Step 2: simplex-downhill optimization\n"); 492 // 493 // const int n = 2; 494 // // choose initial simplex points as the best parameters so far 495 // int kmeansNMPoints[n*(n+1)]; 496 // float kmeansVals[n+1]; 497 // for (int i=0;i<n+1;++i) { 498 // kmeansNMPoints[i*n] = (int)kmeansCosts[i].params["branching"]; 499 // kmeansNMPoints[i*n+1] = (int)kmeansCosts[i].params["max-iterations"]; 500 // kmeansVals[i] = kmeansCosts[i].timeCost; 501 // } 502 // KMeansSimpleDownhillFunctor kmeans_cost_func(*this); 503 // // run optimization 504 // optimizeSimplexDownhill(kmeansNMPoints,n,kmeans_cost_func,kmeansVals); 505 // // store results 506 // for (int i=0;i<n+1;++i) { 507 // kmeansCosts[i].params["branching"] = kmeansNMPoints[i*2]; 508 // kmeansCosts[i].params["max-iterations"] = kmeansNMPoints[i*2+1]; 509 // kmeansCosts[i].timeCost = kmeansVals[i]; 510 // } 511 } 512 513 optimizeKDTree(std::vector<CostData> & costs)514 void optimizeKDTree(std::vector<CostData>& costs) 515 { 516 Logger::info("KD-TREE, Step 1: Exploring parameter space\n"); 517 518 // explore kd-tree parameters space using the parameters below 519 int testTrees[] = { 1, 4, 8, 16, 32 }; 520 521 // evaluate kdtree for all parameter combinations 522 for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) { 523 CostData cost; 524 cost.params["algorithm"] = FLANN_INDEX_KDTREE; 525 cost.params["trees"] = testTrees[i]; 526 527 evaluate_kdtree(cost); 528 costs.push_back(cost); 529 } 530 531 // Logger::info("KD-TREE, Step 2: simplex-downhill optimization\n"); 532 // 533 // const int n = 1; 534 // // choose initial simplex points as the best parameters so far 535 // int kdtreeNMPoints[n*(n+1)]; 536 // float kdtreeVals[n+1]; 537 // for (int i=0;i<n+1;++i) { 538 // kdtreeNMPoints[i] = (int)kdtreeCosts[i].params["trees"]; 539 // kdtreeVals[i] = kdtreeCosts[i].timeCost; 540 // } 541 // KDTreeSimpleDownhillFunctor kdtree_cost_func(*this); 542 // // run optimization 543 // optimizeSimplexDownhill(kdtreeNMPoints,n,kdtree_cost_func,kdtreeVals); 544 // // store results 545 // for (int i=0;i<n+1;++i) { 546 // kdtreeCosts[i].params["trees"] = kdtreeNMPoints[i]; 547 // kdtreeCosts[i].timeCost = kdtreeVals[i]; 548 // } 549 } 550 551 /** 552 * Chooses the best nearest-neighbor algorithm and estimates the optimal 553 * parameters to use when building the index (for a given precision). 554 * Returns a dictionary with the optimal parameters. 555 */ estimateBuildParams()556 IndexParams estimateBuildParams() 557 { 558 std::vector<CostData> costs; 559 560 int sampleSize = int(sample_fraction_ * dataset_.rows); 561 int testSampleSize = std::min(sampleSize / 10, 1000); 562 563 Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_); 564 565 // For a very small dataset, it makes no sense to build any fancy index, just 566 // use linear search 567 if (testSampleSize < 10) { 568 Logger::info("Choosing linear, dataset too small\n"); 569 return LinearIndexParams(); 570 } 571 572 // We use a fraction of the original dataset to speedup the autotune algorithm 573 sampledDataset_ = random_sample(dataset_, sampleSize); 574 // We use a cross-validation approach, first we sample a testset from the dataset 575 testDataset_ = random_sample(sampledDataset_, testSampleSize, true); 576 577 // We compute the ground truth using linear search 578 Logger::info("Computing ground truth... \n"); 579 gt_matches_ = Matrix<size_t>(new size_t[testDataset_.rows], testDataset_.rows, 1); 580 StartStopTimer t; 581 int repeats = 0; 582 t.reset(); 583 while (t.value<0.2) { 584 repeats++; 585 t.start(); 586 compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_); 587 t.stop(); 588 } 589 590 CostData linear_cost; 591 linear_cost.searchTimeCost = (float)t.value/repeats; 592 linear_cost.buildTimeCost = 0; 593 linear_cost.memoryCost = 0; 594 linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR; 595 596 costs.push_back(linear_cost); 597 598 // Start parameter autotune process 599 Logger::info("Autotuning parameters...\n"); 600 601 optimizeKMeans(costs); 602 optimizeKDTree(costs); 603 604 float bestTimeCost = costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost; 605 for (size_t i = 0; i < costs.size(); ++i) { 606 float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost; 607 Logger::debug("Time cost: %g\n", timeCost); 608 if (timeCost < bestTimeCost) { 609 bestTimeCost = timeCost; 610 } 611 } 612 Logger::debug("Best time cost: %g\n", bestTimeCost); 613 614 IndexParams bestParams = costs[0].params; 615 if (bestTimeCost > 0) { 616 float bestCost = (costs[0].buildTimeCost * build_weight_ + costs[0].searchTimeCost) / bestTimeCost; 617 for (size_t i = 0; i < costs.size(); ++i) { 618 float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost + 619 memory_weight_ * costs[i].memoryCost; 620 Logger::debug("Cost: %g\n", crtCost); 621 if (crtCost < bestCost) { 622 bestCost = crtCost; 623 bestParams = costs[i].params; 624 } 625 } 626 Logger::debug("Best cost: %g\n", bestCost); 627 } 628 629 delete[] gt_matches_.ptr(); 630 delete[] testDataset_.ptr(); 631 delete[] sampledDataset_.ptr(); 632 633 return bestParams; 634 } 635 636 637 638 /** 639 * Estimates the search time parameters needed to get the desired precision. 640 * Precondition: the index is built 641 * Postcondition: the searchParams will have the optimum params set, also the speedup obtained over linear search. 642 */ estimateSearchParams(SearchParams & searchParams)643 float estimateSearchParams(SearchParams& searchParams) 644 { 645 const int nn = 1; 646 const size_t SAMPLE_COUNT = 1000; 647 648 assert(bestIndex_ != NULL); // must have a valid index 649 650 float speedup = 0; 651 652 int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT); 653 if (samples > 0) { 654 Matrix<ElementType> testDataset = random_sample(dataset_, samples); 655 656 Logger::info("Computing ground truth\n"); 657 658 // we need to compute the ground truth first 659 Matrix<size_t> gt_matches(new size_t[testDataset.rows], testDataset.rows, 1); 660 StartStopTimer t; 661 int repeats = 0; 662 t.reset(); 663 while (t.value<0.2) { 664 repeats++; 665 t.start(); 666 compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_); 667 t.stop(); 668 } 669 float linear = (float)t.value/repeats; 670 671 int checks; 672 Logger::info("Estimating number of checks\n"); 673 674 float searchTime; 675 float cb_index; 676 if (bestIndex_->getType() == FLANN_INDEX_KMEANS) { 677 Logger::info("KMeans algorithm, estimating cluster border factor\n"); 678 KMeansIndex<Distance>* kmeans = static_cast<KMeansIndex<Distance>*>(bestIndex_); 679 float bestSearchTime = -1; 680 float best_cb_index = -1; 681 int best_checks = -1; 682 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) { 683 kmeans->set_cb_index(cb_index); 684 searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1); 685 if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) { 686 bestSearchTime = searchTime; 687 best_cb_index = cb_index; 688 best_checks = checks; 689 } 690 } 691 searchTime = bestSearchTime; 692 cb_index = best_cb_index; 693 checks = best_checks; 694 695 kmeans->set_cb_index(best_cb_index); 696 Logger::info("Optimum cb_index: %g\n", cb_index); 697 bestParams_["cb_index"] = cb_index; 698 } 699 else { 700 searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1); 701 } 702 703 Logger::info("Required number of checks: %d \n", checks); 704 searchParams.checks = checks; 705 706 speedup = linear / searchTime; 707 708 delete[] gt_matches.ptr(); 709 delete[] testDataset.ptr(); 710 } 711 712 return speedup; 713 } 714 715 swap(AutotunedIndex & other)716 void swap(AutotunedIndex& other) 717 { 718 BaseClass::swap(other); 719 std::swap(bestIndex_, other.bestIndex_); 720 std::swap(bestParams_, other.bestParams_); 721 std::swap(bestSearchParams_, other.bestSearchParams_); 722 std::swap(speedup_, other.speedup_); 723 std::swap(dataset_, other.dataset_); 724 std::swap(target_precision_, other.target_precision_); 725 std::swap(build_weight_, other.build_weight_); 726 std::swap(memory_weight_, other.memory_weight_); 727 std::swap(sample_fraction_, other.sample_fraction_); 728 } 729 730 private: 731 NNIndex<Distance>* bestIndex_; 732 733 IndexParams bestParams_; 734 SearchParams bestSearchParams_; 735 736 Matrix<ElementType> sampledDataset_; 737 Matrix<ElementType> testDataset_; 738 Matrix<size_t> gt_matches_; 739 740 float speedup_; 741 742 /** 743 * The dataset used by this index 744 */ 745 Matrix<ElementType> dataset_; 746 747 /** 748 * Index parameters 749 */ 750 float target_precision_; 751 float build_weight_; 752 float memory_weight_; 753 float sample_fraction_; 754 755 USING_BASECLASS_SYMBOLS 756 }; 757 } 758 759 #endif /* FLANN_AUTOTUNED_INDEX_H_ */ 760