1 // This example shows how to sort structs using complex multiple part keys using
2 // string_sort.
3 //
4 //  Copyright Steven Ross 2009-2014.
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 //    (See accompanying file LICENSE_1_0.txt or copy at
8 //          http://www.boost.org/LICENSE_1_0.txt)
9 
10 //  See http://www.boost.org/libs/sort for library home page.
11 
12 #include <boost/sort/spreadsort/string_sort.hpp>
13 #include <boost/sort/spreadsort/float_sort.hpp>
14 #include <time.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <algorithm>
18 #include <vector>
19 #include <iostream>
20 #include <fstream>
21 #include <string>
22 using std::string;
23 using namespace boost::sort::spreadsort;
24 
25 //[generalized_functors
26 struct DATA_TYPE {
27   time_t birth;
28   float net_worth;
29   string first_name;
30   string last_name;
31 };
32 
33 static const int birth_size = sizeof(time_t);
34 static const int first_name_offset = birth_size + sizeof(float);
35 static const boost::uint64_t base_mask = 0xff;
36 
37 struct lessthan {
operator ()lessthan38   inline bool operator()(const DATA_TYPE &x, const DATA_TYPE &y) const {
39     if (x.birth != y.birth) {
40       return x.birth < y.birth;
41     }
42     if (x.net_worth != y.net_worth) {
43       return x.net_worth < y.net_worth;
44     }
45     if (x.first_name != y.first_name) {
46       return x.first_name < y.first_name;
47     }
48     return x.last_name < y.last_name;
49   }
50 };
51 
52 struct bracket {
operator ()bracket53   inline unsigned char operator()(const DATA_TYPE &x, size_t offset) const {
54     // Sort date as a signed int, returning the appropriate byte.
55     if (offset < birth_size) {
56       const int bit_shift = 8 * (birth_size - offset - 1);
57       unsigned char result = (x.birth & (base_mask << bit_shift)) >> bit_shift;
58       // Handling the sign bit.  Unnecessary if the data is always positive.
59       if (offset == 0) {
60         return result ^ 128;
61       }
62 
63       return result;
64     }
65 
66     // Sort a signed float.  This requires reversing the order of negatives
67     // because of the way floats are represented in bits.
68     if (offset < first_name_offset) {
69       const int bit_shift = 8 * (first_name_offset - offset - 1);
70       unsigned key = float_mem_cast<float, unsigned>(x.net_worth);
71       unsigned char result = (key & (base_mask << bit_shift)) >> bit_shift;
72       // Handling the sign.
73       if (x.net_worth < 0) {
74         return 255 - result;
75       }
76       // Increasing positives so they are higher than negatives.
77       if (offset == birth_size) {
78         return 128 + result;
79       }
80 
81       return result;
82     }
83 
84     // Sort a string that is before the end.  This approach supports embedded
85     // nulls.  If embedded nulls are not required, then just delete the "* 2"
86     // and the inside of the following if just becomes:
87     // return x.first_name[offset - first_name_offset];
88     const unsigned first_name_end_offset =
89       first_name_offset + x.first_name.size() * 2;
90     if (offset < first_name_end_offset) {
91       int char_offset = offset - first_name_offset;
92       // This signals that the string continues.
93       if (!(char_offset & 1)) {
94         return 1;
95       }
96       return x.first_name[char_offset >> 1];
97     }
98 
99     // This signals that the string has ended, so that shorter strings come
100     // before longer ones.
101     if (offset == first_name_end_offset) {
102       return 0;
103     }
104 
105     // The final string needs no special consideration.
106     return x.last_name[offset - first_name_end_offset - 1];
107   }
108 };
109 
110 struct getsize {
operator ()getsize111   inline size_t operator()(const DATA_TYPE &x) const {
112     return first_name_offset + x.first_name.size() * 2 + 1 +
113       x.last_name.size();
114   }
115 };
116 //] [/generalized_functors]
117 
118 //Pass in an argument to test std::sort
main(int argc,const char ** argv)119 int main(int argc, const char ** argv) {
120   std::ifstream indata;
121   std::ofstream outfile;
122   bool stdSort = false;
123   unsigned loopCount = 1;
124   for (int u = 1; u < argc; ++u) {
125     if (std::string(argv[u]) == "-std")
126       stdSort = true;
127     else
128       loopCount = atoi(argv[u]);
129   }
130   double total = 0.0;
131   //Run multiple loops, if requested
132   std::vector<DATA_TYPE> array;
133   for (unsigned u = 0; u < loopCount; ++u) {
134     indata.open("input.txt", std::ios_base::in | std::ios_base::binary);
135     if (indata.bad()) {
136       printf("input.txt could not be opened\n");
137       return 1;
138     }
139 
140     // Read in the data.
141     DATA_TYPE inval;
142     while (!indata.eof() ) {
143       indata >> inval.first_name;
144       indata >> inval.last_name;
145       indata.read(reinterpret_cast<char *>(&(inval.birth)), birth_size);
146       indata.read(reinterpret_cast<char *>(&(inval.net_worth)), sizeof(float));
147       // Handling nan.
148       if (inval.net_worth != inval.net_worth) {
149         inval.net_worth = 0;
150       }
151       if (indata.eof())
152         break;
153       array.push_back(inval);
154     }
155     indata.close();
156 
157     // Sort the data.
158     clock_t start, end;
159     double elapsed;
160     start = clock();
161     if (stdSort) {
162       std::sort(array.begin(), array.end(), lessthan());
163     } else {
164 //[generalized_functors_call
165       string_sort(array.begin(), array.end(), bracket(), getsize(), lessthan());
166 //] [/generalized_functors_call]
167     }
168     end = clock();
169     elapsed = static_cast<double>(end - start);
170     if (stdSort) {
171       outfile.open("standard_sort_out.txt", std::ios_base::out |
172                    std::ios_base::binary | std::ios_base::trunc);
173     } else {
174       outfile.open("boost_sort_out.txt", std::ios_base::out |
175                    std::ios_base::binary | std::ios_base::trunc);
176     }
177     if (outfile.good()) {
178       for (unsigned u = 0; u < array.size(); ++u)
179         outfile << array[u].birth << " " << array[u].net_worth << " "
180                 << array[u].first_name << " " << array[u].last_name << "\n";
181       outfile.close();
182     }
183     total += elapsed;
184     array.clear();
185   }
186   if (stdSort) {
187     printf("std::sort elapsed time %f\n", total / CLOCKS_PER_SEC);
188   } else {
189     printf("spreadsort elapsed time %f\n", total / CLOCKS_PER_SEC);
190   }
191   return 0;
192 }
193