1 /* 2 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu> 3 * 4 * This file is part of Bowtie 2. 5 * 6 * Bowtie 2 is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * Bowtie 2 is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #ifndef RANDOM_UTIL_H_ 21 #define RANDOM_UTIL_H_ 22 23 #include <algorithm> 24 #include "random_source.h" 25 #include "ds.h" 26 27 /** 28 * Return a random integer in [1, N]. Each time it's called it samples again 29 * without replacement. done() indicates when all elements have been given 30 * out. 31 */ 32 class Random1toN { 33 34 typedef uint32_t T; 35 36 public: 37 38 // A set with fewer than this many elements should kick us into swap-list 39 // mode immediately. Otherwise we start in seen-list mode and then 40 // possibly proceed to swap-list mode later. 41 static const size_t SWAPLIST_THRESH; 42 43 // Convert seen-list to swap-list after this many entries in the seen-list. 44 static const size_t CONVERSION_THRESH; 45 46 // Convert seen-list to swap-list after this (this times n_) many entries 47 // in the seen-list. 48 static const float CONVERSION_FRAC; 49 50 Random1toN(int cat = 0) : 51 sz_(0), n_(0), cur_(0), 52 list_(SWAPLIST_THRESH, cat), seen_(CONVERSION_THRESH, cat), 53 thresh_(0) {} 54 55 Random1toN(size_t n, int cat = 0) : 56 sz_(0), n_(n), cur_(0), 57 list_(SWAPLIST_THRESH, cat), seen_(CONVERSION_THRESH, cat), 58 thresh_(0) {} 59 60 /** 61 * Initialize the set of pseudo-randoms to be given out without replacement. 62 */ init(size_t n,bool withoutReplacement)63 void init(size_t n, bool withoutReplacement) { 64 sz_ = n_ = n; 65 converted_ = false; 66 swaplist_ = n < SWAPLIST_THRESH || withoutReplacement; 67 cur_ = 0; 68 list_.clear(); 69 seen_.clear(); 70 thresh_ = std::max(CONVERSION_THRESH, (size_t)(CONVERSION_FRAC * n)); 71 } 72 73 /** 74 * Reset in preparation for giving out a fresh collection of pseudo-randoms 75 * without replacement. 76 */ reset()77 void reset() { 78 sz_ = n_ = cur_ = 0; swaplist_ = converted_ = false; 79 list_.clear(); seen_.clear(); 80 thresh_ = 0; 81 } 82 83 /** 84 * Get next pseudo-random element without replacement. 85 */ next(RandomSource & rnd)86 T next(RandomSource& rnd) { 87 assert(!done()); 88 if(cur_ == 0 && !converted_) { 89 // This is the first call to next() 90 if(n_ == 1) { 91 // Trivial case: set of 1 92 cur_ = 1; 93 return 0; 94 } 95 if(swaplist_) { 96 // The set is small, so we go immediately to the random 97 // swapping list 98 list_.resize(n_); 99 for(size_t i = 0; i < n_; i++) { 100 list_[i] = (T)i; 101 } 102 } 103 } 104 if(swaplist_) { 105 // Get next pseudo-random using the swap-list 106 size_t r = cur_ + (rnd.nextU32() % (n_ - cur_)); 107 if(r != cur_) { 108 std::swap(list_[cur_], list_[r]); 109 } 110 return list_[cur_++]; 111 } else { 112 assert(!converted_); 113 // Get next pseudo-random but reject it if it's in the seen-list 114 bool again = true; 115 T rn = 0; 116 size_t seenSz = seen_.size(); 117 while(again) { 118 rn = rnd.nextU32() % (T)n_; 119 again = false; 120 for(size_t i = 0; i < seenSz; i++) { 121 if(seen_[i] == rn) { 122 again = true; 123 break; 124 } 125 } 126 } 127 // Add it to the seen-list 128 seen_.push_back(rn); 129 cur_++; 130 assert_leq(cur_, n_); 131 // Move on to using the swap-list? 132 assert_gt(thresh_, 0); 133 if(seen_.size() >= thresh_ && cur_ < n_) { 134 // Add all elements not already in the seen list to the 135 // swap-list 136 assert(!seen_.empty()); 137 seen_.sort(); 138 list_.resize(n_ - cur_); 139 size_t prev = 0; 140 size_t cur = 0; 141 for(size_t i = 0; i <= seenSz; i++) { 142 // Add all the elements between the previous element and 143 // this one 144 for(size_t j = prev; j < seen_[i]; j++) { 145 list_[cur++] = (T)j; 146 } 147 prev = seen_[i]+1; 148 } 149 for(size_t j = prev; j < n_; j++) { 150 list_[cur++] = (T)j; 151 } 152 assert_eq(cur, n_ - cur_); 153 seen_.clear(); 154 cur_ = 0; 155 n_ = list_.size(); 156 converted_ = true; 157 swaplist_ = true; 158 } 159 return rn; 160 } 161 } 162 163 /** 164 * Return true iff the generator was initialized. 165 */ inited()166 bool inited() const { return n_ > 0; } 167 168 /** 169 * Set so that there are no pseudo-randoms remaining. 170 */ setDone()171 void setDone() { assert(inited()); cur_ = n_; } 172 173 /** 174 * Return true iff all pseudo-randoms have already been given out. 175 */ done()176 bool done() const { return inited() && cur_ >= n_; } 177 178 /** 179 * Return the total number of pseudo-randoms we are initialized to give 180 * out, including ones already given out. 181 */ size()182 size_t size() const { return n_; } 183 184 /** 185 * Return the number of pseudo-randoms left to give out. 186 */ left()187 size_t left() const { return n_ - cur_; } 188 189 /** 190 * Return the total size occupued by the Descent driver and all its 191 * constituent parts. 192 */ totalSizeBytes()193 size_t totalSizeBytes() const { 194 return list_.totalSizeBytes() + 195 seen_.totalSizeBytes(); 196 } 197 198 /** 199 * Return the total capacity of the Descent driver and all its constituent 200 * parts. 201 */ totalCapacityBytes()202 size_t totalCapacityBytes() const { 203 return list_.totalCapacityBytes() + 204 seen_.totalCapacityBytes(); 205 } 206 207 protected: 208 209 size_t sz_; // domain to pick elts from 210 size_t n_; // number of elements in active list 211 bool swaplist_; // if small, use swapping 212 bool converted_; // true iff seen-list was converted to swap-list 213 size_t cur_; // # times next() was called 214 EList<T> list_; // pseudo-random swapping list 215 EList<T> seen_; // prior to swaplist_ mode, list of 216 // pseudo-randoms given out 217 size_t thresh_; // conversion threshold for this instantiation, which 218 // depends both on CONVERSION_THRESH and on n_ 219 }; 220 221 #endif 222