1 /* 2 ----------------------------------------------------------------------------- 3 This source file is part of OGRE 4 (Object-oriented Graphics Rendering Engine) 5 For the latest info, see http://www.ogre3d.org/ 6 7 Copyright (c) 2000-2013 Torus Knot Software Ltd 8 9 Permission is hereby granted, free of charge, to any person obtaining a copy 10 of this software and associated documentation files (the "Software"), to deal 11 in the Software without restriction, including without limitation the rights 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 copies of the Software, and to permit persons to whom the Software is 14 furnished to do so, subject to the following conditions: 15 16 The above copyright notice and this permission notice shall be included in 17 all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 THE SOFTWARE. 26 ----------------------------------------------------------------------------- 27 */ 28 #ifndef __RadixSort_H__ 29 #define __RadixSort_H__ 30 31 #include "OgrePrerequisites.h" 32 33 namespace Ogre { 34 35 /** \addtogroup Core 36 * @{ 37 */ 38 /** \addtogroup General 39 * @{ 40 */ 41 /** Class for performing a radix sort (fast comparison-less sort based on 42 byte value) on various standard STL containers. 43 @remarks 44 A radix sort is a very fast sort algorithm. It doesn't use comparisons 45 and thus is able to break the theoretical minimum O(N*logN) complexity. 46 Radix sort is complexity O(k*N), where k is a constant. Note that radix 47 sorting is not in-place, it requires additional storage, so it trades 48 memory for speed. The overhead of copying means that it is only faster 49 for fairly large datasets, so you are advised to only use it for collections 50 of at least a few hundred items. 51 @par 52 This is a template class to allow it to deal with a variety of containers, 53 and a variety of value types to sort on. In addition to providing the 54 container and value type on construction, you also need to supply a 55 functor object which will retrieve the value to compare on for each item 56 in the list. For example, if you had an std::vector of by-value instances 57 of an object of class 'Bibble', and you wanted to sort on 58 Bibble::getDoobrie(), you'd have to firstly create a functor 59 like this: 60 @code 61 struct BibbleSortFunctor 62 { 63 float operator()(const Bibble& val) const 64 { 65 return val.getDoobrie(); 66 } 67 } 68 @endcode 69 Then, you need to declare a RadixSort class which names the container type, 70 the value type in the container, and the type of the value you want to 71 sort by. You can then call the sort function. E.g. 72 @code 73 RadixSort<BibbleList, Bibble, float> radixSorter; 74 BibbleSortFunctor functor; 75 76 radixSorter.sort(myBibbleList, functor); 77 @endcode 78 You should try to reuse RadixSort instances, since repeated allocation of the 79 internal storage is then avoided. 80 @note 81 Radix sorting is often associated with just unsigned integer values. Our 82 implementation can handle both unsigned and signed integers, as well as 83 floats (which are often not supported by other radix sorters). doubles 84 are not supported; you will need to implement your functor object to convert 85 to float if you wish to use this sort routine. 86 */ 87 template <class TContainer, class TContainerValueType, typename TCompValueType> 88 class RadixSort 89 { 90 public: 91 typedef typename TContainer::iterator ContainerIter; 92 protected: 93 /// Alpha-pass counters of values (histogram) 94 /// 4 of them so we can radix sort a maximum of a 32bit value 95 int mCounters[4][256]; 96 /// Beta-pass offsets 97 int mOffsets[256]; 98 /// Sort area size 99 int mSortSize; 100 /// Number of passes for this type 101 int mNumPasses; 102 103 struct SortEntry 104 { 105 TCompValueType key; 106 ContainerIter iter; SortEntrySortEntry107 SortEntry() {} SortEntrySortEntry108 SortEntry(TCompValueType k, ContainerIter it) 109 : key(k), iter(it) {} 110 111 }; 112 /// Temp sort storage 113 typedef std::vector<SortEntry, STLAllocator<SortEntry, GeneralAllocPolicy> > SortVector; 114 SortVector mSortArea1; 115 SortVector mSortArea2; 116 SortVector* mSrc; 117 SortVector* mDest; 118 TContainer mTmpContainer; // initial copy 119 120 sortPass(int byteIndex)121 void sortPass(int byteIndex) 122 { 123 // Calculate offsets 124 // Basically this just leaves gaps for duplicate entries to fill 125 mOffsets[0] = 0; 126 for (int i = 1; i < 256; ++i) 127 { 128 mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1]; 129 } 130 131 // Sort pass 132 for (int i = 0; i < mSortSize; ++i) 133 { 134 unsigned char byteVal = getByte(byteIndex, (*mSrc)[i].key); 135 (*mDest)[mOffsets[byteVal]++] = (*mSrc)[i]; 136 } 137 138 } 139 template <typename T> finalPass(int byteIndex,T val)140 void finalPass(int byteIndex, T val) 141 { 142 // default is to do normal pass 143 sortPass(byteIndex); 144 } 145 146 // special case signed int finalPass(int byteIndex,int val)147 void finalPass(int byteIndex, int val) 148 { 149 int numNeg = 0; 150 // all negative values are in entries 128+ in most significant byte 151 for (int i = 128; i < 256; ++i) 152 { 153 numNeg += mCounters[byteIndex][i]; 154 } 155 // Calculate offsets - positive ones start at the number of negatives 156 // do positive numbers 157 mOffsets[0] = numNeg; 158 for (int i = 1; i < 128; ++i) 159 { 160 mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1]; 161 } 162 // Do negative numbers (must start at zero) 163 // No need to invert ordering, already correct (-1 is highest number) 164 mOffsets[128] = 0; 165 for (int i = 129; i < 256; ++i) 166 { 167 mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1]; 168 } 169 170 // Sort pass 171 for (int i = 0; i < mSortSize; ++i) 172 { 173 unsigned char byteVal = getByte(byteIndex, (*mSrc)[i].key); 174 (*mDest)[mOffsets[byteVal]++] = (*mSrc)[i]; 175 } 176 } 177 178 179 // special case float finalPass(int byteIndex,float val)180 void finalPass(int byteIndex, float val) 181 { 182 // floats need to be special cased since negative numbers will come 183 // after positives (high bit = sign) and will be in reverse order 184 // (no ones-complement of the +ve value) 185 int numNeg = 0; 186 // all negative values are in entries 128+ in most significant byte 187 for (int i = 128; i < 256; ++i) 188 { 189 numNeg += mCounters[byteIndex][i]; 190 } 191 // Calculate offsets - positive ones start at the number of negatives 192 // do positive numbers normally 193 mOffsets[0] = numNeg; 194 for (int i = 1; i < 128; ++i) 195 { 196 mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1]; 197 } 198 // Do negative numbers (must start at zero) 199 // Also need to invert ordering 200 // In order to preserve the stability of the sort (essential since 201 // we rely on previous bytes already being sorted) we have to count 202 // backwards in our offsets from 203 mOffsets[255] = mCounters[byteIndex][255]; 204 for (int i = 254; i > 127; --i) 205 { 206 mOffsets[i] = mOffsets[i+1] + mCounters[byteIndex][i]; 207 } 208 209 // Sort pass 210 for (int i = 0; i < mSortSize; ++i) 211 { 212 unsigned char byteVal = getByte(byteIndex, (*mSrc)[i].key); 213 if (byteVal > 127) 214 { 215 // -ve; pre-decrement since offsets set to count 216 (*mDest)[--mOffsets[byteVal]] = (*mSrc)[i]; 217 } 218 else 219 { 220 // +ve 221 (*mDest)[mOffsets[byteVal]++] = (*mSrc)[i]; 222 } 223 } 224 } 225 getByte(int byteIndex,TCompValueType val)226 inline unsigned char getByte(int byteIndex, TCompValueType val) 227 { 228 #if OGRE_ENDIAN == OGRE_ENDIAN_LITTLE 229 return ((unsigned char*)(&val))[byteIndex]; 230 #else 231 return ((unsigned char*)(&val))[mNumPasses - byteIndex - 1]; 232 #endif 233 } 234 235 public: 236 RadixSort()237 RadixSort() {} ~RadixSort()238 ~RadixSort() {} 239 240 /** Main sort function 241 @param container A container of the type you declared when declaring 242 @param func A functor which returns the value for comparison when given 243 a container value 244 */ 245 template <class TFunction> sort(TContainer & container,TFunction func)246 void sort(TContainer& container, TFunction func) 247 { 248 if (container.empty()) 249 return; 250 251 // Set up the sort areas 252 mSortSize = static_cast<int>(container.size()); 253 mSortArea1.resize(container.size()); 254 mSortArea2.resize(container.size()); 255 256 // Copy data now (we need constant iterators for sorting) 257 mTmpContainer = container; 258 259 mNumPasses = sizeof(TCompValueType); 260 261 // Counter pass 262 // Initialise the counts 263 int p; 264 for (p = 0; p < mNumPasses; ++p) 265 memset(mCounters[p], 0, sizeof(int) * 256); 266 267 // Perform alpha pass to count 268 ContainerIter i = mTmpContainer.begin(); 269 TCompValueType prevValue = func.operator()(*i); 270 bool needsSorting = false; 271 for (int u = 0; i != mTmpContainer.end(); ++i, ++u) 272 { 273 // get sort value 274 TCompValueType val = func.operator()(*i); 275 // cheap check to see if needs sorting (temporal coherence) 276 if (!needsSorting && val < prevValue) 277 needsSorting = true; 278 279 // Create a sort entry 280 mSortArea1[u].key = val; 281 mSortArea1[u].iter = i; 282 283 // increase counters 284 for (p = 0; p < mNumPasses; ++p) 285 { 286 unsigned char byteVal = getByte(p, val); 287 mCounters[p][byteVal]++; 288 } 289 290 prevValue = val; 291 292 } 293 294 // early exit if already sorted 295 if (!needsSorting) 296 return; 297 298 299 // Sort passes 300 mSrc = &mSortArea1; 301 mDest = &mSortArea2; 302 303 for (p = 0; p < mNumPasses - 1; ++p) 304 { 305 sortPass(p); 306 // flip src/dst 307 SortVector* tmp = mSrc; 308 mSrc = mDest; 309 mDest = tmp; 310 } 311 // Final pass may differ, make polymorphic 312 finalPass(p, prevValue); 313 314 // Copy everything back 315 int c = 0; 316 for (i = container.begin(); 317 i != container.end(); ++i, ++c) 318 { 319 *i = *((*mDest)[c].iter); 320 } 321 } 322 323 }; 324 325 /** @} */ 326 /** @} */ 327 328 } 329 #endif 330 331