1 2 #ifndef B3_RADIXSORT32_H 3 #define B3_RADIXSORT32_H 4 5 #include "b3OpenCLArray.h" 6 7 struct b3SortData 8 { 9 union { 10 unsigned int m_key; 11 unsigned int x; 12 }; 13 14 union { 15 unsigned int m_value; 16 unsigned int y; 17 }; 18 }; 19 #include "b3BufferInfoCL.h" 20 21 class b3RadixSort32CL 22 { 23 b3OpenCLArray<unsigned int>* m_workBuffer1; 24 b3OpenCLArray<unsigned int>* m_workBuffer2; 25 26 b3OpenCLArray<b3SortData>* m_workBuffer3; 27 b3OpenCLArray<b3SortData>* m_workBuffer4; 28 29 b3OpenCLArray<unsigned int>* m_workBuffer3a; 30 b3OpenCLArray<unsigned int>* m_workBuffer4a; 31 32 cl_command_queue m_commandQueue; 33 34 cl_kernel m_streamCountSortDataKernel; 35 cl_kernel m_streamCountKernel; 36 37 cl_kernel m_prefixScanKernel; 38 cl_kernel m_sortAndScatterSortDataKernel; 39 cl_kernel m_sortAndScatterKernel; 40 41 bool m_deviceCPU; 42 43 class b3PrefixScanCL* m_scan; 44 class b3FillCL* m_fill; 45 46 public: 47 struct b3ConstData 48 { 49 int m_n; 50 int m_nWGs; 51 int m_startBit; 52 int m_nBlocksPerWG; 53 }; 54 enum 55 { 56 DATA_ALIGNMENT = 256, 57 WG_SIZE = 64, 58 BLOCK_SIZE = 256, 59 ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE / WG_SIZE), 60 BITS_PER_PASS = 4, 61 NUM_BUCKET = (1 << BITS_PER_PASS), 62 // if you change this, change nPerWI in kernel as well 63 NUM_WGS = 20 * 6, // cypress 64 // NUM_WGS = 24*6, // cayman 65 // NUM_WGS = 32*4, // nv 66 }; 67 68 private: 69 public: 70 b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity = 0); 71 72 virtual ~b3RadixSort32CL(); 73 74 void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn, 75 b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32); 76 77 ///keys only 78 void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32); 79 80 void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32); 81 void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32); 82 void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32); 83 }; 84 #endif //B3_RADIXSORT32_H 85