1 /*******************************************************************************
2  * tlx/sort/strings.hpp
3  *
4  * Front-end for string sorting algorithms.
5  *
6  * Part of tlx - http://panthema.net/tlx
7  *
8  * Copyright (C) 2018 Timo Bingmann <tb@panthema.net>
9  *
10  * All rights reserved. Published under the Boost Software License, Version 1.0
11  ******************************************************************************/
12 
13 #ifndef TLX_SORT_STRINGS_HEADER
14 #define TLX_SORT_STRINGS_HEADER
15 
16 #include <tlx/sort/strings/insertion_sort.hpp>
17 #include <tlx/sort/strings/multikey_quicksort.hpp>
18 #include <tlx/sort/strings/radix_sort.hpp>
19 
20 #include <cstdint>
21 #include <string>
22 #include <vector>
23 
24 namespace tlx {
25 
26 //! \addtogroup tlx_sort
27 //! \{
28 //! \name String Sorting Algorithms
29 //! \{
30 
31 /******************************************************************************/
32 
33 /*!
34  * Sort a set of strings represented by C-style uint8_t* in place.
35  *
36  * If the memory limit is non zero, possibly slower algorithms will be selected
37  * to stay within the memory limit.
38  */
39 static inline
sort_strings(unsigned char ** strings,size_t size,size_t memory=0)40 void sort_strings(unsigned char** strings, size_t size, size_t memory = 0) {
41     sort_strings_detail::radixsort_CE3(
42         sort_strings_detail::StringPtr<sort_strings_detail::UCharStringSet>(
43             sort_strings_detail::UCharStringSet(strings, strings + size)),
44         /* depth */ 0, memory);
45 }
46 
47 /*!
48  * Sort a set of strings represented by C-style char* in place.
49  *
50  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
51  * If the memory limit is non zero, possibly slower algorithms will be selected
52  * to stay within the memory limit.
53  */
54 static inline
sort_strings(char ** strings,size_t size,size_t memory=0)55 void sort_strings(char** strings, size_t size, size_t memory = 0) {
56     return sort_strings(
57         reinterpret_cast<unsigned char**>(strings), size, memory);
58 }
59 
60 /*!
61  * Sort a set of strings represented by C-style uint8_t* in place.
62  *
63  * If the memory limit is non zero, possibly slower algorithms will be selected
64  * to stay within the memory limit.
65  */
66 static inline
sort_strings(const unsigned char ** strings,size_t size,size_t memory=0)67 void sort_strings(const unsigned char** strings, size_t size,
68                   size_t memory = 0) {
69     sort_strings_detail::radixsort_CE3(
70         sort_strings_detail::StringPtr<sort_strings_detail::CUCharStringSet>(
71             sort_strings_detail::CUCharStringSet(strings, strings + size)),
72         /* depth */ 0, memory);
73 }
74 
75 /*!
76  * Sort a set of strings represented by C-style char* in place.
77  *
78  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
79  * If the memory limit is non zero, possibly slower algorithms will be selected
80  * to stay within the memory limit.
81  */
82 static inline
sort_strings(const char ** strings,size_t size,size_t memory=0)83 void sort_strings(const char** strings, size_t size, size_t memory = 0) {
84     return sort_strings(
85         reinterpret_cast<const unsigned char**>(strings), size, memory);
86 }
87 
88 /******************************************************************************/
89 
90 /*!
91  * Sort a set of strings represented by C-style char* in place.
92  *
93  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
94  * If the memory limit is non zero, possibly slower algorithms will be selected
95  * to stay within the memory limit.
96  */
97 static inline
sort_strings(std::vector<char * > & strings,size_t memory=0)98 void sort_strings(std::vector<char*>& strings, size_t memory = 0) {
99     return sort_strings(strings.data(), strings.size(), memory);
100 }
101 
102 /*!
103  * Sort a set of strings represented by C-style uint8_t* in place.
104  *
105  * If the memory limit is non zero, possibly slower algorithms will be selected
106  * to stay within the memory limit.
107  */
108 static inline
sort_strings(std::vector<unsigned char * > & strings,size_t memory=0)109 void sort_strings(std::vector<unsigned char*>& strings, size_t memory = 0) {
110     return sort_strings(strings.data(), strings.size(), memory);
111 }
112 
113 /*!
114  * Sort a set of strings represented by C-style char* in place.
115  *
116  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
117  * If the memory limit is non zero, possibly slower algorithms will be selected
118  * to stay within the memory limit.
119  */
120 static inline
sort_strings(std::vector<const char * > & strings,size_t memory=0)121 void sort_strings(std::vector<const char*>& strings, size_t memory = 0) {
122     return sort_strings(strings.data(), strings.size(), memory);
123 }
124 
125 /*!
126  * Sort a set of strings represented by C-style uint8_t* in place.
127  *
128  * If the memory limit is non zero, possibly slower algorithms will be selected
129  * to stay within the memory limit.
130  */
131 static inline
sort_strings(std::vector<const unsigned char * > & strings,size_t memory=0)132 void sort_strings(std::vector<const unsigned char*>& strings,
133                   size_t memory = 0) {
134     return sort_strings(strings.data(), strings.size(), memory);
135 }
136 
137 /******************************************************************************/
138 
139 /*!
140  * Sort a set of std::strings in place.
141  *
142  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
143  * If the memory limit is non zero, possibly slower algorithms will be selected
144  * to stay within the memory limit.
145  */
146 static inline
sort_strings(std::string * strings,size_t size,size_t memory=0)147 void sort_strings(std::string* strings, size_t size, size_t memory = 0) {
148     sort_strings_detail::radixsort_CE3(
149         sort_strings_detail::StringPtr<sort_strings_detail::StdStringSet>(
150             sort_strings_detail::StdStringSet(strings, strings + size)),
151         /* depth */ 0, memory);
152 }
153 
154 /*!
155  * Sort a vector of std::strings in place.
156  *
157  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
158  * If the memory limit is non zero, possibly slower algorithms will be selected
159  * to stay within the memory limit.
160  */
161 static inline
sort_strings(std::vector<std::string> & strings,size_t memory=0)162 void sort_strings(std::vector<std::string>& strings, size_t memory = 0) {
163     return sort_strings(strings.data(), strings.size(), memory);
164 }
165 
166 /******************************************************************************/
167 /******************************************************************************/
168 /******************************************************************************/
169 
170 /*!
171  * Sort a set of strings represented by C-style uint8_t* in place.
172  *
173  * If the memory limit is non zero, possibly slower algorithms will be selected
174  * to stay within the memory limit.
175  */
176 static inline
sort_strings_lcp(unsigned char ** strings,size_t size,uint32_t * lcp,size_t memory=0)177 void sort_strings_lcp(unsigned char** strings, size_t size, uint32_t* lcp,
178                       size_t memory = 0) {
179     sort_strings_detail::radixsort_CE3(
180         sort_strings_detail::StringLcpPtr<
181             sort_strings_detail::UCharStringSet, uint32_t>(
182             sort_strings_detail::UCharStringSet(strings, strings + size), lcp),
183         /* depth */ 0, memory);
184 }
185 
186 /*!
187  * Sort a set of strings represented by C-style char* in place.
188  *
189  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
190  * If the memory limit is non zero, possibly slower algorithms will be selected
191  * to stay within the memory limit.
192  */
193 static inline
sort_strings_lcp(char ** strings,size_t size,uint32_t * lcp,size_t memory=0)194 void sort_strings_lcp(char** strings, size_t size, uint32_t* lcp,
195                       size_t memory = 0) {
196     return sort_strings_lcp(
197         reinterpret_cast<unsigned char**>(strings), size, lcp, memory);
198 }
199 
200 /*!
201  * Sort a set of strings represented by C-style uint8_t* in place.
202  *
203  * If the memory limit is non zero, possibly slower algorithms will be selected
204  * to stay within the memory limit.
205  */
206 static inline
sort_strings_lcp(const unsigned char ** strings,size_t size,uint32_t * lcp,size_t memory=0)207 void sort_strings_lcp(const unsigned char** strings, size_t size, uint32_t* lcp,
208                       size_t memory = 0) {
209     sort_strings_detail::radixsort_CE3(
210         sort_strings_detail::StringLcpPtr<
211             sort_strings_detail::CUCharStringSet, uint32_t>(
212             sort_strings_detail::CUCharStringSet(strings, strings + size), lcp),
213         /* depth */ 0, memory);
214 }
215 
216 /*!
217  * Sort a set of strings represented by C-style char* in place.
218  *
219  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
220  * If the memory limit is non zero, possibly slower algorithms will be selected
221  * to stay within the memory limit.
222  */
223 static inline
sort_strings_lcp(const char ** strings,size_t size,uint32_t * lcp,size_t memory=0)224 void sort_strings_lcp(const char** strings, size_t size, uint32_t* lcp,
225                       size_t memory = 0) {
226     return sort_strings_lcp(
227         reinterpret_cast<const unsigned char**>(strings), size, lcp, memory);
228 }
229 
230 /******************************************************************************/
231 
232 /*!
233  * Sort a set of strings represented by C-style char* in place.
234  *
235  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
236  * If the memory limit is non zero, possibly slower algorithms will be selected
237  * to stay within the memory limit.
238  */
239 static inline
sort_strings_lcp(std::vector<char * > & strings,uint32_t * lcp,size_t memory=0)240 void sort_strings_lcp(std::vector<char*>& strings, uint32_t* lcp,
241                       size_t memory = 0) {
242     return sort_strings_lcp(strings.data(), strings.size(), lcp, memory);
243 }
244 
245 /*!
246  * Sort a set of strings represented by C-style uint8_t* in place.
247  *
248  * If the memory limit is non zero, possibly slower algorithms will be selected
249  * to stay within the memory limit.
250  */
251 static inline
sort_strings_lcp(std::vector<unsigned char * > & strings,uint32_t * lcp,size_t memory=0)252 void sort_strings_lcp(std::vector<unsigned char*>& strings, uint32_t* lcp,
253                       size_t memory = 0) {
254     return sort_strings_lcp(strings.data(), strings.size(), lcp, memory);
255 }
256 
257 /*!
258  * Sort a set of strings represented by C-style char* in place.
259  *
260  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
261  * If the memory limit is non zero, possibly slower algorithms will be selected
262  * to stay within the memory limit.
263  */
264 static inline
sort_strings_lcp(std::vector<const char * > & strings,uint32_t * lcp,size_t memory=0)265 void sort_strings_lcp(std::vector<const char*>& strings, uint32_t* lcp,
266                       size_t memory = 0) {
267     return sort_strings_lcp(strings.data(), strings.size(), lcp, memory);
268 }
269 
270 /*!
271  * Sort a set of strings represented by C-style uint8_t* in place.
272  *
273  * If the memory limit is non zero, possibly slower algorithms will be selected
274  * to stay within the memory limit.
275  */
276 static inline
sort_strings_lcp(std::vector<const unsigned char * > & strings,uint32_t * lcp,size_t memory=0)277 void sort_strings_lcp(std::vector<const unsigned char*>& strings, uint32_t* lcp,
278                       size_t memory = 0) {
279     return sort_strings_lcp(strings.data(), strings.size(), lcp, memory);
280 }
281 
282 /******************************************************************************/
283 
284 /*!
285  * Sort a set of std::strings in place.
286  *
287  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
288  * If the memory limit is non zero, possibly slower algorithms will be selected
289  * to stay within the memory limit.
290  */
291 static inline
sort_strings_lcp(std::string * strings,size_t size,uint32_t * lcp,size_t memory=0)292 void sort_strings_lcp(std::string* strings, size_t size, uint32_t* lcp,
293                       size_t memory = 0) {
294     sort_strings_detail::radixsort_CE3(
295         sort_strings_detail::StringLcpPtr<
296             sort_strings_detail::StdStringSet, uint32_t>(
297             sort_strings_detail::StdStringSet(strings, strings + size), lcp),
298         /* depth */ 0, memory);
299 }
300 
301 /*!
302  * Sort a vector of std::strings in place.
303  *
304  * The strings are sorted as _unsigned_ 8-bit characters, not signed characters!
305  * If the memory limit is non zero, possibly slower algorithms will be selected
306  * to stay within the memory limit.
307  */
308 static inline
sort_strings_lcp(std::vector<std::string> & strings,uint32_t * lcp,size_t memory=0)309 void sort_strings_lcp(std::vector<std::string>& strings, uint32_t* lcp,
310                       size_t memory = 0) {
311     return sort_strings_lcp(strings.data(), strings.size(), lcp, memory);
312 }
313 
314 /******************************************************************************/
315 
316 //! \}
317 //! \}
318 
319 } // namespace tlx
320 
321 #endif // !TLX_SORT_STRINGS_HEADER
322 
323 /******************************************************************************/
324