1 /*
2  * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3  *
4  * This file is part of Bowtie 2.
5  *
6  * Bowtie 2 is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Bowtie 2 is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef RANDOM_UTIL_H_
21 #define RANDOM_UTIL_H_
22 
23 #include <algorithm>
24 #include "random_source.h"
25 #include "ds.h"
26 
27 /**
28  * Return a random integer in [1, N].  Each time it's called it samples again
29  * without replacement.  done() indicates when all elements have been given
30  * out.
31  */
32 class Random1toN {
33 
34 	typedef uint32_t T;
35 
36 public:
37 
38 	// A set with fewer than this many elements should kick us into swap-list
39 	// mode immediately.  Otherwise we start in seen-list mode and then
40 	// possibly proceed to swap-list mode later.
41 	static const size_t SWAPLIST_THRESH;
42 
43 	// Convert seen-list to swap-list after this many entries in the seen-list.
44 	static const size_t CONVERSION_THRESH;
45 
46 	// Convert seen-list to swap-list after this (this times n_) many entries
47 	// in the seen-list.
48 	static const float CONVERSION_FRAC;
49 
50 	Random1toN(int cat = 0) :
51 		sz_(0), n_(0), cur_(0),
52 		list_(SWAPLIST_THRESH, cat), seen_(CONVERSION_THRESH, cat),
53 		thresh_(0) {}
54 
55 	Random1toN(size_t n, int cat = 0) :
56 		sz_(0), n_(n), cur_(0),
57 		list_(SWAPLIST_THRESH, cat), seen_(CONVERSION_THRESH, cat),
58 		thresh_(0) {}
59 
60 	/**
61 	 * Initialize the set of pseudo-randoms to be given out without replacement.
62 	 */
init(size_t n,bool withoutReplacement)63 	void init(size_t n, bool withoutReplacement) {
64 		sz_ = n_ = n;
65 		converted_ = false;
66 		swaplist_ = n < SWAPLIST_THRESH || withoutReplacement;
67 		cur_ = 0;
68 		list_.clear();
69 		seen_.clear();
70 		thresh_ = std::max(CONVERSION_THRESH, (size_t)(CONVERSION_FRAC * n));
71 	}
72 
73 	/**
74 	 * Reset in preparation for giving out a fresh collection of pseudo-randoms
75 	 * without replacement.
76 	 */
reset()77 	void reset() {
78 		sz_ = n_ = cur_ = 0; swaplist_ = converted_ = false;
79 		list_.clear(); seen_.clear();
80 		thresh_ = 0;
81 	}
82 
83 	/**
84 	 * Get next pseudo-random element without replacement.
85 	 */
next(RandomSource & rnd)86 	T next(RandomSource& rnd) {
87 		assert(!done());
88 		if(cur_ == 0 && !converted_) {
89 			// This is the first call to next()
90 			if(n_ == 1) {
91 				// Trivial case: set of 1
92 				cur_ = 1;
93 				return 0;
94 			}
95 			if(swaplist_) {
96 				// The set is small, so we go immediately to the random
97 				// swapping list
98 				list_.resize(n_);
99 				for(size_t i = 0; i < n_; i++) {
100 					list_[i] = (T)i;
101 				}
102 			}
103 		}
104 		if(swaplist_) {
105 			// Get next pseudo-random using the swap-list
106 			size_t r = cur_ + (rnd.nextU32() % (n_ - cur_));
107 			if(r != cur_) {
108 				std::swap(list_[cur_], list_[r]);
109 			}
110 			return list_[cur_++];
111 		} else {
112 			assert(!converted_);
113 			// Get next pseudo-random but reject it if it's in the seen-list
114 			bool again = true;
115 			T rn = 0;
116 			size_t seenSz = seen_.size();
117 			while(again) {
118 				rn = rnd.nextU32() % (T)n_;
119 				again = false;
120 				for(size_t i = 0; i < seenSz; i++) {
121 					if(seen_[i] == rn) {
122 						again = true;
123 						break;
124 					}
125 				}
126 			}
127 			// Add it to the seen-list
128 			seen_.push_back(rn);
129 			cur_++;
130 			assert_leq(cur_, n_);
131 			// Move on to using the swap-list?
132 			assert_gt(thresh_, 0);
133 			if(seen_.size() >= thresh_ && cur_ < n_) {
134 				// Add all elements not already in the seen list to the
135 				// swap-list
136 				assert(!seen_.empty());
137 				seen_.sort();
138 				list_.resize(n_ - cur_);
139 				size_t prev = 0;
140 				size_t cur = 0;
141 				for(size_t i = 0; i <= seenSz; i++) {
142 					// Add all the elements between the previous element and
143 					// this one
144 					for(size_t j = prev; j < seen_[i]; j++) {
145 						list_[cur++] = (T)j;
146 					}
147 					prev = seen_[i]+1;
148 				}
149 				for(size_t j = prev; j < n_; j++) {
150 					list_[cur++] = (T)j;
151 				}
152 				assert_eq(cur, n_ - cur_);
153 				seen_.clear();
154 				cur_ = 0;
155 				n_ = list_.size();
156 				converted_ = true;
157 				swaplist_ = true;
158 			}
159 			return rn;
160 		}
161 	}
162 
163 	/**
164 	 * Return true iff the generator was initialized.
165 	 */
inited()166 	bool inited() const { return n_ > 0; }
167 
168 	/**
169 	 * Set so that there are no pseudo-randoms remaining.
170 	 */
setDone()171 	void setDone() { assert(inited()); cur_ = n_; }
172 
173 	/**
174 	 * Return true iff all pseudo-randoms have already been given out.
175 	 */
done()176 	bool done() const { return inited() && cur_ >= n_; }
177 
178 	/**
179 	 * Return the total number of pseudo-randoms we are initialized to give
180 	 * out, including ones already given out.
181 	 */
size()182 	size_t size() const { return n_; }
183 
184 	/**
185 	 * Return the number of pseudo-randoms left to give out.
186 	 */
left()187 	size_t left() const { return n_ - cur_; }
188 
189 	/**
190 	 * Return the total size occupued by the Descent driver and all its
191 	 * constituent parts.
192 	 */
totalSizeBytes()193 	size_t totalSizeBytes() const {
194 		return list_.totalSizeBytes() +
195 		       seen_.totalSizeBytes();
196 	}
197 
198 	/**
199 	 * Return the total capacity of the Descent driver and all its constituent
200 	 * parts.
201 	 */
totalCapacityBytes()202 	size_t totalCapacityBytes() const {
203 		return list_.totalCapacityBytes() +
204 		       seen_.totalCapacityBytes();
205 	}
206 
207 protected:
208 
209 	size_t   sz_;        // domain to pick elts from
210 	size_t   n_;         // number of elements in active list
211 	bool     swaplist_;  // if small, use swapping
212 	bool     converted_; // true iff seen-list was converted to swap-list
213 	size_t   cur_;       // # times next() was called
214 	EList<T> list_;      // pseudo-random swapping list
215 	EList<T> seen_;      // prior to swaplist_ mode, list of
216 	                     // pseudo-randoms given out
217 	size_t   thresh_;    // conversion threshold for this instantiation, which
218 	                     // depends both on CONVERSION_THRESH and on n_
219 };
220 
221 #endif
222