1 /*******************************************************************************
2 * tlx/sort/strings.hpp
3 *
4 * Front-end for string sorting algorithms.
5 *
6 * Part of tlx - http://panthema.net/tlx
7 *
8 * Copyright (C) 2018 Timo Bingmann <tb@panthema.net>
9 *
10 * All rights reserved. Published under the Boost Software License, Version 1.0
11 ******************************************************************************/
12
13 #ifndef TLX_SORT_STRINGS_HEADER
14 #define TLX_SORT_STRINGS_HEADER
15
16 #include <tlx/sort/strings/insertion_sort.hpp>
17 #include <tlx/sort/strings/multikey_quicksort.hpp>
18 #include <tlx/sort/strings/radix_sort.hpp>
19
20 #include <cstdint>
21 #include <string>
22 #include <vector>
23
24 namespace tlx {
25
26 //! \addtogroup tlx_sort
27 //! \{
28 //! \name String Sorting Algorithms
29 //! \{
30
31 /******************************************************************************/
32
33 /*!
34 * Sort a set of strings represented by C-style uint8_t* in place.
35 *
36 * If the memory limit is non zero, possibly slower algorithms will be selected
37 * to stay within the memory limit.
38 */
39 static inline
sort_strings(unsigned char ** strings,size_t size,size_t memory=0)40 void sort_strings(unsigned char** strings, size_t size, size_t memory = 0) {
41 sort_strings_detail::radixsort_CE3(
42 sort_strings_detail::StringPtr<sort_strings_detail::UCharStringSet>(
43 sort_strings_detail::UCharStringSet(strings, strings + size)),
44 /* depth */ 0, memory);
45 }
46
47 /*!
48 * Sort a set of strings represented by C-style char* in place.
49 *
50 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
51 * If the memory limit is non zero, possibly slower algorithms will be selected
52 * to stay within the memory limit.
53 */
54 static inline
sort_strings(char ** strings,size_t size,size_t memory=0)55 void sort_strings(char** strings, size_t size, size_t memory = 0) {
56 return sort_strings(
57 reinterpret_cast<unsigned char**>(strings), size, memory);
58 }
59
60 /*!
61 * Sort a set of strings represented by C-style uint8_t* in place.
62 *
63 * If the memory limit is non zero, possibly slower algorithms will be selected
64 * to stay within the memory limit.
65 */
66 static inline
sort_strings(const unsigned char ** strings,size_t size,size_t memory=0)67 void sort_strings(const unsigned char** strings, size_t size,
68 size_t memory = 0) {
69 sort_strings_detail::radixsort_CE3(
70 sort_strings_detail::StringPtr<sort_strings_detail::CUCharStringSet>(
71 sort_strings_detail::CUCharStringSet(strings, strings + size)),
72 /* depth */ 0, memory);
73 }
74
75 /*!
76 * Sort a set of strings represented by C-style char* in place.
77 *
78 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
79 * If the memory limit is non zero, possibly slower algorithms will be selected
80 * to stay within the memory limit.
81 */
82 static inline
sort_strings(const char ** strings,size_t size,size_t memory=0)83 void sort_strings(const char** strings, size_t size, size_t memory = 0) {
84 return sort_strings(
85 reinterpret_cast<const unsigned char**>(strings), size, memory);
86 }
87
88 /******************************************************************************/
89
90 /*!
91 * Sort a set of strings represented by C-style char* in place.
92 *
93 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
94 * If the memory limit is non zero, possibly slower algorithms will be selected
95 * to stay within the memory limit.
96 */
97 static inline
sort_strings(std::vector<char * > & strings,size_t memory=0)98 void sort_strings(std::vector<char*>& strings, size_t memory = 0) {
99 return sort_strings(strings.data(), strings.size(), memory);
100 }
101
102 /*!
103 * Sort a set of strings represented by C-style uint8_t* in place.
104 *
105 * If the memory limit is non zero, possibly slower algorithms will be selected
106 * to stay within the memory limit.
107 */
108 static inline
sort_strings(std::vector<unsigned char * > & strings,size_t memory=0)109 void sort_strings(std::vector<unsigned char*>& strings, size_t memory = 0) {
110 return sort_strings(strings.data(), strings.size(), memory);
111 }
112
113 /*!
114 * Sort a set of strings represented by C-style char* in place.
115 *
116 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
117 * If the memory limit is non zero, possibly slower algorithms will be selected
118 * to stay within the memory limit.
119 */
120 static inline
sort_strings(std::vector<const char * > & strings,size_t memory=0)121 void sort_strings(std::vector<const char*>& strings, size_t memory = 0) {
122 return sort_strings(strings.data(), strings.size(), memory);
123 }
124
125 /*!
126 * Sort a set of strings represented by C-style uint8_t* in place.
127 *
128 * If the memory limit is non zero, possibly slower algorithms will be selected
129 * to stay within the memory limit.
130 */
131 static inline
sort_strings(std::vector<const unsigned char * > & strings,size_t memory=0)132 void sort_strings(std::vector<const unsigned char*>& strings,
133 size_t memory = 0) {
134 return sort_strings(strings.data(), strings.size(), memory);
135 }
136
137 /******************************************************************************/
138
139 /*!
140 * Sort a set of std::strings in place.
141 *
142 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
143 * If the memory limit is non zero, possibly slower algorithms will be selected
144 * to stay within the memory limit.
145 */
146 static inline
sort_strings(std::string * strings,size_t size,size_t memory=0)147 void sort_strings(std::string* strings, size_t size, size_t memory = 0) {
148 sort_strings_detail::radixsort_CE3(
149 sort_strings_detail::StringPtr<sort_strings_detail::StdStringSet>(
150 sort_strings_detail::StdStringSet(strings, strings + size)),
151 /* depth */ 0, memory);
152 }
153
154 /*!
155 * Sort a vector of std::strings in place.
156 *
157 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
158 * If the memory limit is non zero, possibly slower algorithms will be selected
159 * to stay within the memory limit.
160 */
161 static inline
sort_strings(std::vector<std::string> & strings,size_t memory=0)162 void sort_strings(std::vector<std::string>& strings, size_t memory = 0) {
163 return sort_strings(strings.data(), strings.size(), memory);
164 }
165
166 /******************************************************************************/
167 /******************************************************************************/
168 /******************************************************************************/
169
170 /*!
171 * Sort a set of strings represented by C-style uint8_t* in place.
172 *
173 * If the memory limit is non zero, possibly slower algorithms will be selected
174 * to stay within the memory limit.
175 */
176 static inline
sort_strings_lcp(unsigned char ** strings,size_t size,uint32_t * lcp,size_t memory=0)177 void sort_strings_lcp(unsigned char** strings, size_t size, uint32_t* lcp,
178 size_t memory = 0) {
179 sort_strings_detail::radixsort_CE3(
180 sort_strings_detail::StringLcpPtr<
181 sort_strings_detail::UCharStringSet, uint32_t>(
182 sort_strings_detail::UCharStringSet(strings, strings + size), lcp),
183 /* depth */ 0, memory);
184 }
185
186 /*!
187 * Sort a set of strings represented by C-style char* in place.
188 *
189 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
190 * If the memory limit is non zero, possibly slower algorithms will be selected
191 * to stay within the memory limit.
192 */
193 static inline
sort_strings_lcp(char ** strings,size_t size,uint32_t * lcp,size_t memory=0)194 void sort_strings_lcp(char** strings, size_t size, uint32_t* lcp,
195 size_t memory = 0) {
196 return sort_strings_lcp(
197 reinterpret_cast<unsigned char**>(strings), size, lcp, memory);
198 }
199
200 /*!
201 * Sort a set of strings represented by C-style uint8_t* in place.
202 *
203 * If the memory limit is non zero, possibly slower algorithms will be selected
204 * to stay within the memory limit.
205 */
206 static inline
sort_strings_lcp(const unsigned char ** strings,size_t size,uint32_t * lcp,size_t memory=0)207 void sort_strings_lcp(const unsigned char** strings, size_t size, uint32_t* lcp,
208 size_t memory = 0) {
209 sort_strings_detail::radixsort_CE3(
210 sort_strings_detail::StringLcpPtr<
211 sort_strings_detail::CUCharStringSet, uint32_t>(
212 sort_strings_detail::CUCharStringSet(strings, strings + size), lcp),
213 /* depth */ 0, memory);
214 }
215
216 /*!
217 * Sort a set of strings represented by C-style char* in place.
218 *
219 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
220 * If the memory limit is non zero, possibly slower algorithms will be selected
221 * to stay within the memory limit.
222 */
223 static inline
sort_strings_lcp(const char ** strings,size_t size,uint32_t * lcp,size_t memory=0)224 void sort_strings_lcp(const char** strings, size_t size, uint32_t* lcp,
225 size_t memory = 0) {
226 return sort_strings_lcp(
227 reinterpret_cast<const unsigned char**>(strings), size, lcp, memory);
228 }
229
230 /******************************************************************************/
231
232 /*!
233 * Sort a set of strings represented by C-style char* in place.
234 *
235 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
236 * If the memory limit is non zero, possibly slower algorithms will be selected
237 * to stay within the memory limit.
238 */
239 static inline
sort_strings_lcp(std::vector<char * > & strings,uint32_t * lcp,size_t memory=0)240 void sort_strings_lcp(std::vector<char*>& strings, uint32_t* lcp,
241 size_t memory = 0) {
242 return sort_strings_lcp(strings.data(), strings.size(), lcp, memory);
243 }
244
245 /*!
246 * Sort a set of strings represented by C-style uint8_t* in place.
247 *
248 * If the memory limit is non zero, possibly slower algorithms will be selected
249 * to stay within the memory limit.
250 */
251 static inline
sort_strings_lcp(std::vector<unsigned char * > & strings,uint32_t * lcp,size_t memory=0)252 void sort_strings_lcp(std::vector<unsigned char*>& strings, uint32_t* lcp,
253 size_t memory = 0) {
254 return sort_strings_lcp(strings.data(), strings.size(), lcp, memory);
255 }
256
257 /*!
258 * Sort a set of strings represented by C-style char* in place.
259 *
260 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
261 * If the memory limit is non zero, possibly slower algorithms will be selected
262 * to stay within the memory limit.
263 */
264 static inline
sort_strings_lcp(std::vector<const char * > & strings,uint32_t * lcp,size_t memory=0)265 void sort_strings_lcp(std::vector<const char*>& strings, uint32_t* lcp,
266 size_t memory = 0) {
267 return sort_strings_lcp(strings.data(), strings.size(), lcp, memory);
268 }
269
270 /*!
271 * Sort a set of strings represented by C-style uint8_t* in place.
272 *
273 * If the memory limit is non zero, possibly slower algorithms will be selected
274 * to stay within the memory limit.
275 */
276 static inline
sort_strings_lcp(std::vector<const unsigned char * > & strings,uint32_t * lcp,size_t memory=0)277 void sort_strings_lcp(std::vector<const unsigned char*>& strings, uint32_t* lcp,
278 size_t memory = 0) {
279 return sort_strings_lcp(strings.data(), strings.size(), lcp, memory);
280 }
281
282 /******************************************************************************/
283
284 /*!
285 * Sort a set of std::strings in place.
286 *
287 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
288 * If the memory limit is non zero, possibly slower algorithms will be selected
289 * to stay within the memory limit.
290 */
291 static inline
sort_strings_lcp(std::string * strings,size_t size,uint32_t * lcp,size_t memory=0)292 void sort_strings_lcp(std::string* strings, size_t size, uint32_t* lcp,
293 size_t memory = 0) {
294 sort_strings_detail::radixsort_CE3(
295 sort_strings_detail::StringLcpPtr<
296 sort_strings_detail::StdStringSet, uint32_t>(
297 sort_strings_detail::StdStringSet(strings, strings + size), lcp),
298 /* depth */ 0, memory);
299 }
300
301 /*!
302 * Sort a vector of std::strings in place.
303 *
304 * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
305 * If the memory limit is non zero, possibly slower algorithms will be selected
306 * to stay within the memory limit.
307 */
308 static inline
sort_strings_lcp(std::vector<std::string> & strings,uint32_t * lcp,size_t memory=0)309 void sort_strings_lcp(std::vector<std::string>& strings, uint32_t* lcp,
310 size_t memory = 0) {
311 return sort_strings_lcp(strings.data(), strings.size(), lcp, memory);
312 }
313
314 /******************************************************************************/
315
316 //! \}
317 //! \}
318
319 } // namespace tlx
320
321 #endif // !TLX_SORT_STRINGS_HEADER
322
323 /******************************************************************************/
324