1 /*
2 -----------------------------------------------------------------------------
3 This source file is part of OGRE
4     (Object-oriented Graphics Rendering Engine)
5 For the latest info, see http://www.ogre3d.org/
6 
7 Copyright (c) 2000-2013 Torus Knot Software Ltd
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 -----------------------------------------------------------------------------
27 */
28 #ifndef __RadixSort_H__
29 #define __RadixSort_H__
30 
31 #include "OgrePrerequisites.h"
32 
33 namespace Ogre {
34 
35 	/** \addtogroup Core
36 	*  @{
37 	*/
38 	/** \addtogroup General
39 	*  @{
40 	*/
41 	/** Class for performing a radix sort (fast comparison-less sort based on
42 		byte value) on various standard STL containers.
43 	@remarks
44 		A radix sort is a very fast sort algorithm. It doesn't use comparisons
45 		and thus is able to break the theoretical minimum O(N*logN) complexity.
46 		Radix sort is complexity O(k*N), where k is a constant. Note that radix
47 		sorting is not in-place, it requires additional storage, so it trades
48 		memory for speed. The overhead of copying means that it is only faster
49 		for fairly large datasets, so you are advised to only use it for collections
50 		of at least a few hundred items.
51 	@par
52 		This is a template class to allow it to deal with a variety of containers,
53 		and a variety of value types to sort on. In addition to providing the
54 		container and value type on construction, you also need to supply a
55 		functor object which will retrieve the value to compare on for each item
56 		in the list. For example, if you had an std::vector of by-value instances
57 		of an object of class 'Bibble', and you wanted to sort on
58 		Bibble::getDoobrie(), you'd have to firstly create a functor
59 		like this:
60 	@code
61 		struct BibbleSortFunctor
62 		{
63 			float operator()(const Bibble& val) const
64 			{
65 				return val.getDoobrie();
66 			}
67 		}
68 	@endcode
69 		Then, you need to declare a RadixSort class which names the container type,
70 		the value type in the container, and the type of the value you want to
71 		sort by. You can then call the sort function. E.g.
72 	@code
73 		RadixSort<BibbleList, Bibble, float> radixSorter;
74 		BibbleSortFunctor functor;
75 
76 		radixSorter.sort(myBibbleList, functor);
77 	@endcode
78 		You should try to reuse RadixSort instances, since repeated allocation of the
79 		internal storage is then avoided.
80 	@note
81 		Radix sorting is often associated with just unsigned integer values. Our
82 		implementation can handle both unsigned and signed integers, as well as
83 		floats (which are often not supported by other radix sorters). doubles
84 		are not supported; you will need to implement your functor object to convert
85 		to float if you wish to use this sort routine.
86 	*/
87 	template <class TContainer, class TContainerValueType, typename TCompValueType>
88 	class RadixSort
89 	{
90 	public:
91 		typedef typename TContainer::iterator ContainerIter;
92 	protected:
93 		/// Alpha-pass counters of values (histogram)
94 		/// 4 of them so we can radix sort a maximum of a 32bit value
95 		int mCounters[4][256];
96 		/// Beta-pass offsets
97 		int mOffsets[256];
98 		/// Sort area size
99 		int mSortSize;
100 		/// Number of passes for this type
101 		int mNumPasses;
102 
103 		struct SortEntry
104 		{
105 			TCompValueType key;
106 			ContainerIter iter;
SortEntrySortEntry107 			SortEntry() {}
SortEntrySortEntry108 			SortEntry(TCompValueType k, ContainerIter it)
109 				: key(k), iter(it) {}
110 
111 		};
112 		/// Temp sort storage
113 		typedef std::vector<SortEntry, STLAllocator<SortEntry, GeneralAllocPolicy> > SortVector;
114 		SortVector mSortArea1;
115 		SortVector mSortArea2;
116 		SortVector* mSrc;
117 		SortVector* mDest;
118 		TContainer mTmpContainer; // initial copy
119 
120 
sortPass(int byteIndex)121 		void sortPass(int byteIndex)
122 		{
123 			// Calculate offsets
124 			// Basically this just leaves gaps for duplicate entries to fill
125 			mOffsets[0] = 0;
126 			for (int i = 1; i < 256; ++i)
127 			{
128 				mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1];
129 			}
130 
131 			// Sort pass
132 			for (int i = 0; i < mSortSize; ++i)
133 			{
134 				unsigned char byteVal = getByte(byteIndex, (*mSrc)[i].key);
135 				(*mDest)[mOffsets[byteVal]++] = (*mSrc)[i];
136 			}
137 
138 		}
139 		template <typename T>
finalPass(int byteIndex,T val)140 		void finalPass(int byteIndex, T val)
141 		{
142 			// default is to do normal pass
143 			sortPass(byteIndex);
144 		}
145 
146 		// special case signed int
finalPass(int byteIndex,int val)147 		void finalPass(int byteIndex, int val)
148 		{
149 			int numNeg = 0;
150 			// all negative values are in entries 128+ in most significant byte
151 			for (int i = 128; i < 256; ++i)
152 			{
153 				numNeg += mCounters[byteIndex][i];
154 			}
155 			// Calculate offsets - positive ones start at the number of negatives
156 			// do positive numbers
157 			mOffsets[0] = numNeg;
158 			for (int i = 1; i < 128; ++i)
159 			{
160 				mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1];
161 			}
162 			// Do negative numbers (must start at zero)
163 			// No need to invert ordering, already correct (-1 is highest number)
164 			mOffsets[128] = 0;
165 			for (int i = 129; i < 256; ++i)
166 			{
167 				mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1];
168 			}
169 
170 			// Sort pass
171 			for (int i = 0; i < mSortSize; ++i)
172 			{
173 				unsigned char byteVal = getByte(byteIndex, (*mSrc)[i].key);
174 				(*mDest)[mOffsets[byteVal]++] = (*mSrc)[i];
175 			}
176 		}
177 
178 
179 		// special case float
finalPass(int byteIndex,float val)180 		void finalPass(int byteIndex, float val)
181 		{
182 			// floats need to be special cased since negative numbers will come
183 			// after positives (high bit = sign) and will be in reverse order
184 			// (no ones-complement of the +ve value)
185 			int numNeg = 0;
186 			// all negative values are in entries 128+ in most significant byte
187 			for (int i = 128; i < 256; ++i)
188 			{
189 				numNeg += mCounters[byteIndex][i];
190 			}
191 			// Calculate offsets - positive ones start at the number of negatives
192 			// do positive numbers normally
193 			mOffsets[0] = numNeg;
194 			for (int i = 1; i < 128; ++i)
195 			{
196 				mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1];
197 			}
198 			// Do negative numbers (must start at zero)
199 			// Also need to invert ordering
200 			// In order to preserve the stability of the sort (essential since
201 			// we rely on previous bytes already being sorted) we have to count
202 			// backwards in our offsets from
203 			mOffsets[255] = mCounters[byteIndex][255];
204 			for (int i = 254; i > 127; --i)
205 			{
206 				mOffsets[i] = mOffsets[i+1] + mCounters[byteIndex][i];
207 			}
208 
209 			// Sort pass
210 			for (int i = 0; i < mSortSize; ++i)
211 			{
212 				unsigned char byteVal = getByte(byteIndex, (*mSrc)[i].key);
213 				if (byteVal > 127)
214 				{
215 					// -ve; pre-decrement since offsets set to count
216 					(*mDest)[--mOffsets[byteVal]] = (*mSrc)[i];
217 				}
218 				else
219 				{
220 					// +ve
221 					(*mDest)[mOffsets[byteVal]++] = (*mSrc)[i];
222 				}
223 			}
224 		}
225 
getByte(int byteIndex,TCompValueType val)226 		inline unsigned char getByte(int byteIndex, TCompValueType val)
227 		{
228 #if OGRE_ENDIAN == OGRE_ENDIAN_LITTLE
229 			return ((unsigned char*)(&val))[byteIndex];
230 #else
231 			return ((unsigned char*)(&val))[mNumPasses - byteIndex - 1];
232 #endif
233 		}
234 
235 	public:
236 
RadixSort()237 		RadixSort() {}
~RadixSort()238 		~RadixSort() {}
239 
240 		/** Main sort function
241 		@param container A container of the type you declared when declaring
242 		@param func A functor which returns the value for comparison when given
243 			a container value
244 		*/
245 		template <class TFunction>
sort(TContainer & container,TFunction func)246 		void sort(TContainer& container, TFunction func)
247 		{
248 			if (container.empty())
249 				return;
250 
251 			// Set up the sort areas
252 			mSortSize = static_cast<int>(container.size());
253 			mSortArea1.resize(container.size());
254 			mSortArea2.resize(container.size());
255 
256 			// Copy data now (we need constant iterators for sorting)
257 			mTmpContainer = container;
258 
259 			mNumPasses = sizeof(TCompValueType);
260 
261 			// Counter pass
262 			// Initialise the counts
263 			int p;
264 			for (p = 0; p < mNumPasses; ++p)
265 				memset(mCounters[p], 0, sizeof(int) * 256);
266 
267 			// Perform alpha pass to count
268 			ContainerIter i = mTmpContainer.begin();
269 			TCompValueType prevValue = func.operator()(*i);
270 			bool needsSorting = false;
271 			for (int u = 0; i != mTmpContainer.end(); ++i, ++u)
272 			{
273 				// get sort value
274 				TCompValueType val = func.operator()(*i);
275 				// cheap check to see if needs sorting (temporal coherence)
276 				if (!needsSorting && val < prevValue)
277 					needsSorting = true;
278 
279 				// Create a sort entry
280 				mSortArea1[u].key = val;
281 				mSortArea1[u].iter = i;
282 
283 				// increase counters
284 				for (p = 0; p < mNumPasses; ++p)
285 				{
286 					unsigned char byteVal = getByte(p, val);
287 					mCounters[p][byteVal]++;
288 				}
289 
290 				prevValue = val;
291 
292 			}
293 
294 			// early exit if already sorted
295 			if (!needsSorting)
296 				return;
297 
298 
299 			// Sort passes
300 			mSrc = &mSortArea1;
301 			mDest = &mSortArea2;
302 
303 			for (p = 0; p < mNumPasses - 1; ++p)
304 			{
305 				sortPass(p);
306 				// flip src/dst
307 				SortVector* tmp = mSrc;
308 				mSrc = mDest;
309 				mDest = tmp;
310 			}
311 			// Final pass may differ, make polymorphic
312 			finalPass(p, prevValue);
313 
314 			// Copy everything back
315 			int c = 0;
316 			for (i = container.begin();
317 				i != container.end(); ++i, ++c)
318 			{
319 				*i = *((*mDest)[c].iter);
320 			}
321 		}
322 
323 	};
324 
325 	/** @} */
326 	/** @} */
327 
328 }
329 #endif
330 
331