1c66bbc91SGabor Kovesdan /*- 2c66bbc91SGabor Kovesdan * Copyright (C) 2012 Oleg Moskalenko <oleg.moskalenko@citrix.com> 3c66bbc91SGabor Kovesdan * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org> 4c66bbc91SGabor Kovesdan * All rights reserved. 5c66bbc91SGabor Kovesdan * 6c66bbc91SGabor Kovesdan * Redistribution and use in source and binary forms, with or without 7c66bbc91SGabor Kovesdan * modification, are permitted provided that the following conditions 8c66bbc91SGabor Kovesdan * are met: 9c66bbc91SGabor Kovesdan * 1. Redistributions of source code must retain the above copyright 10c66bbc91SGabor Kovesdan * notice, this list of conditions and the following disclaimer. 11c66bbc91SGabor Kovesdan * 2. Redistributions in binary form must reproduce the above copyright 12c66bbc91SGabor Kovesdan * notice, this list of conditions and the following disclaimer in the 13c66bbc91SGabor Kovesdan * documentation and/or other materials provided with the distribution. 14c66bbc91SGabor Kovesdan * 15c66bbc91SGabor Kovesdan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16c66bbc91SGabor Kovesdan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17c66bbc91SGabor Kovesdan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18c66bbc91SGabor Kovesdan * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19c66bbc91SGabor Kovesdan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20c66bbc91SGabor Kovesdan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21c66bbc91SGabor Kovesdan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22c66bbc91SGabor Kovesdan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23c66bbc91SGabor Kovesdan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24c66bbc91SGabor Kovesdan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25c66bbc91SGabor Kovesdan * SUCH DAMAGE. 26c66bbc91SGabor Kovesdan */ 27c66bbc91SGabor Kovesdan 28c66bbc91SGabor Kovesdan #include <sys/cdefs.h> 29c66bbc91SGabor Kovesdan __FBSDID("$FreeBSD$"); 30c66bbc91SGabor Kovesdan 31c66bbc91SGabor Kovesdan #include <sys/types.h> 32c66bbc91SGabor Kovesdan 33c66bbc91SGabor Kovesdan #include <ctype.h> 34c66bbc91SGabor Kovesdan #include <stdlib.h> 35c66bbc91SGabor Kovesdan #include <string.h> 36c66bbc91SGabor Kovesdan 37c66bbc91SGabor Kovesdan #include "sort.h" 38c66bbc91SGabor Kovesdan #include "vsort.h" 39c66bbc91SGabor Kovesdan 40c66bbc91SGabor Kovesdan static inline bool 41c66bbc91SGabor Kovesdan isdigit_clocale(wchar_t c) 42c66bbc91SGabor Kovesdan { 43c66bbc91SGabor Kovesdan 44c66bbc91SGabor Kovesdan return (c >= L'0' && c <= L'9'); 45c66bbc91SGabor Kovesdan } 46c66bbc91SGabor Kovesdan 47c66bbc91SGabor Kovesdan static inline bool 48c66bbc91SGabor Kovesdan isalpha_clocale(wchar_t c) 49c66bbc91SGabor Kovesdan { 50c66bbc91SGabor Kovesdan 51c66bbc91SGabor Kovesdan return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z')); 52c66bbc91SGabor Kovesdan } 53c66bbc91SGabor Kovesdan 54c66bbc91SGabor Kovesdan static inline bool 55c66bbc91SGabor Kovesdan isalnum_clocale(wchar_t c) 56c66bbc91SGabor Kovesdan { 57c66bbc91SGabor Kovesdan 58c66bbc91SGabor Kovesdan return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || 59c66bbc91SGabor Kovesdan (c >= L'0' && c <= L'9')); 60c66bbc91SGabor Kovesdan } 61c66bbc91SGabor Kovesdan 62c66bbc91SGabor Kovesdan /* 63c66bbc91SGabor Kovesdan * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$ 64c66bbc91SGabor Kovesdan * Set length of string before suffix. 65c66bbc91SGabor Kovesdan */ 66c66bbc91SGabor Kovesdan static void 67c66bbc91SGabor Kovesdan find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len) 68c66bbc91SGabor Kovesdan { 69c66bbc91SGabor Kovesdan wchar_t c; 70c66bbc91SGabor Kovesdan size_t clen; 71c66bbc91SGabor Kovesdan bool expect_alpha, sfx; 72c66bbc91SGabor Kovesdan 73c66bbc91SGabor Kovesdan sfx = false; 74c66bbc91SGabor Kovesdan expect_alpha = false; 75c66bbc91SGabor Kovesdan *len = 0; 76c66bbc91SGabor Kovesdan clen = 0; 77c66bbc91SGabor Kovesdan 78c66bbc91SGabor Kovesdan while ((si < se) && (c = bws_get_iter_value(si))) { 79c66bbc91SGabor Kovesdan if (expect_alpha) { 80c66bbc91SGabor Kovesdan expect_alpha = false; 81c66bbc91SGabor Kovesdan if (!isalpha_clocale(c) && (c != L'~')) 82c66bbc91SGabor Kovesdan sfx = false; 83c66bbc91SGabor Kovesdan } else if (c == L'.') { 84c66bbc91SGabor Kovesdan expect_alpha = true; 85c66bbc91SGabor Kovesdan if (!sfx) { 86c66bbc91SGabor Kovesdan sfx = true; 87c66bbc91SGabor Kovesdan *len = clen; 88c66bbc91SGabor Kovesdan } 89c66bbc91SGabor Kovesdan } else if (!isalnum_clocale(c) && (c != L'~')) 90c66bbc91SGabor Kovesdan sfx = false; 91c66bbc91SGabor Kovesdan 92c66bbc91SGabor Kovesdan si = bws_iterator_inc(si, 1); 93c66bbc91SGabor Kovesdan ++clen; 94c66bbc91SGabor Kovesdan } 95c66bbc91SGabor Kovesdan 96c66bbc91SGabor Kovesdan /* This code must be here to make the implementation compatible 97c66bbc91SGabor Kovesdan * with WORDING of GNU sort documentation. 98c66bbc91SGabor Kovesdan * But the GNU sort implementation is not following its own 99c66bbc91SGabor Kovesdan * documentation. GNU sort allows empty file extensions 100c66bbc91SGabor Kovesdan * (just dot with nothing after); but the regular expression in 101c66bbc91SGabor Kovesdan * their documentation does not allow empty file extensions. 102c66bbc91SGabor Kovesdan * We chose to make our implementation compatible with GNU sort 103c66bbc91SGabor Kovesdan * implementation. If they will ever fix their bug, this code 104c66bbc91SGabor Kovesdan * must be uncommented. Or they may choose to fix the info page, 105c66bbc91SGabor Kovesdan * then the code stays commented. 106c66bbc91SGabor Kovesdan * 107c66bbc91SGabor Kovesdan if (expect_alpha) 108c66bbc91SGabor Kovesdan sfx = false; 109c66bbc91SGabor Kovesdan */ 110c66bbc91SGabor Kovesdan 111c66bbc91SGabor Kovesdan if (!sfx) 112c66bbc91SGabor Kovesdan *len = clen; 113c66bbc91SGabor Kovesdan } 114c66bbc91SGabor Kovesdan 115c66bbc91SGabor Kovesdan static inline int 116c66bbc91SGabor Kovesdan cmp_chars(wchar_t c1, wchar_t c2) 117c66bbc91SGabor Kovesdan { 118c66bbc91SGabor Kovesdan 119c66bbc91SGabor Kovesdan if (c1 == c2) 120c66bbc91SGabor Kovesdan return (0); 121c66bbc91SGabor Kovesdan 122c66bbc91SGabor Kovesdan if (c1 == L'~') 123c66bbc91SGabor Kovesdan return (-1); 124c66bbc91SGabor Kovesdan if (c2 == L'~') 125c66bbc91SGabor Kovesdan return (+1); 126c66bbc91SGabor Kovesdan 127c66bbc91SGabor Kovesdan if (isdigit_clocale(c1) || !c1) 128c66bbc91SGabor Kovesdan return ((isdigit_clocale(c2) || !c2) ? 0 : -1); 129c66bbc91SGabor Kovesdan 130c66bbc91SGabor Kovesdan if (isdigit_clocale(c2) || !c2) 131c66bbc91SGabor Kovesdan return (+1); 132c66bbc91SGabor Kovesdan 133c66bbc91SGabor Kovesdan if (isalpha_clocale(c1)) 134c66bbc91SGabor Kovesdan return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1); 135c66bbc91SGabor Kovesdan 136c66bbc91SGabor Kovesdan if (isalpha_clocale(c2)) 137c66bbc91SGabor Kovesdan return (+1); 138c66bbc91SGabor Kovesdan 139c66bbc91SGabor Kovesdan return ((int) c1 - (int) c2); 140c66bbc91SGabor Kovesdan } 141c66bbc91SGabor Kovesdan 142c66bbc91SGabor Kovesdan static int 143c66bbc91SGabor Kovesdan cmpversions(bwstring_iterator si1, bwstring_iterator se1, 144c66bbc91SGabor Kovesdan bwstring_iterator si2, bwstring_iterator se2) 145c66bbc91SGabor Kovesdan { 146c66bbc91SGabor Kovesdan int cmp, diff; 147c66bbc91SGabor Kovesdan 148c66bbc91SGabor Kovesdan while ((si1 < se1) || (si2 < se2)) { 149c66bbc91SGabor Kovesdan diff = 0; 150c66bbc91SGabor Kovesdan 151c66bbc91SGabor Kovesdan while (((si1 < se1) && 152c66bbc91SGabor Kovesdan !isdigit_clocale(bws_get_iter_value(si1))) || 153c66bbc91SGabor Kovesdan ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) { 154c66bbc91SGabor Kovesdan wchar_t c1, c2; 155c66bbc91SGabor Kovesdan 156c66bbc91SGabor Kovesdan c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0; 157c66bbc91SGabor Kovesdan c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0; 158c66bbc91SGabor Kovesdan 159c66bbc91SGabor Kovesdan cmp = cmp_chars(c1, c2); 160c66bbc91SGabor Kovesdan if (cmp) 161c66bbc91SGabor Kovesdan return (cmp); 162c66bbc91SGabor Kovesdan 163c66bbc91SGabor Kovesdan if (si1 < se1) 164c66bbc91SGabor Kovesdan si1 = bws_iterator_inc(si1, 1); 165c66bbc91SGabor Kovesdan if (si2 < se2) 166c66bbc91SGabor Kovesdan si2 = bws_iterator_inc(si2, 1); 167c66bbc91SGabor Kovesdan } 168c66bbc91SGabor Kovesdan 169c66bbc91SGabor Kovesdan while (bws_get_iter_value(si1) == L'0') 170c66bbc91SGabor Kovesdan si1 = bws_iterator_inc(si1, 1); 171c66bbc91SGabor Kovesdan 172c66bbc91SGabor Kovesdan while (bws_get_iter_value(si2) == L'0') 173c66bbc91SGabor Kovesdan si2 = bws_iterator_inc(si2, 1); 174c66bbc91SGabor Kovesdan 175c66bbc91SGabor Kovesdan while (isdigit_clocale(bws_get_iter_value(si1)) && 176c66bbc91SGabor Kovesdan isdigit_clocale(bws_get_iter_value(si2))) { 177c66bbc91SGabor Kovesdan if (!diff) 178c66bbc91SGabor Kovesdan diff = ((int)bws_get_iter_value(si1) - 179c66bbc91SGabor Kovesdan (int)bws_get_iter_value(si2)); 180c66bbc91SGabor Kovesdan si1 = bws_iterator_inc(si1, 1); 181c66bbc91SGabor Kovesdan si2 = bws_iterator_inc(si2, 1); 182c66bbc91SGabor Kovesdan } 183c66bbc91SGabor Kovesdan 184c66bbc91SGabor Kovesdan if (isdigit_clocale(bws_get_iter_value(si1))) 185c66bbc91SGabor Kovesdan return (1); 186c66bbc91SGabor Kovesdan 187c66bbc91SGabor Kovesdan if (isdigit_clocale(bws_get_iter_value(si2))) 188c66bbc91SGabor Kovesdan return (-1); 189c66bbc91SGabor Kovesdan 190c66bbc91SGabor Kovesdan if (diff) 191c66bbc91SGabor Kovesdan return (diff); 192c66bbc91SGabor Kovesdan } 193c66bbc91SGabor Kovesdan 194c66bbc91SGabor Kovesdan return (0); 195c66bbc91SGabor Kovesdan } 196c66bbc91SGabor Kovesdan 197c66bbc91SGabor Kovesdan /* 198c66bbc91SGabor Kovesdan * Compare two version strings 199c66bbc91SGabor Kovesdan */ 200c66bbc91SGabor Kovesdan int 201c66bbc91SGabor Kovesdan vcmp(struct bwstring *s1, struct bwstring *s2) 202c66bbc91SGabor Kovesdan { 203c66bbc91SGabor Kovesdan bwstring_iterator si1, si2; 204c66bbc91SGabor Kovesdan wchar_t c1, c2; 205c66bbc91SGabor Kovesdan size_t len1, len2, slen1, slen2; 206c66bbc91SGabor Kovesdan int cmp_bytes, cmp_res; 207c66bbc91SGabor Kovesdan 208c66bbc91SGabor Kovesdan if (s1 == s2) 209c66bbc91SGabor Kovesdan return (0); 210c66bbc91SGabor Kovesdan 211c66bbc91SGabor Kovesdan cmp_bytes = bwscmp(s1, s2, 0); 212c66bbc91SGabor Kovesdan if (cmp_bytes == 0) 213c66bbc91SGabor Kovesdan return (0); 214c66bbc91SGabor Kovesdan 215c66bbc91SGabor Kovesdan len1 = slen1 = BWSLEN(s1); 216c66bbc91SGabor Kovesdan len2 = slen2 = BWSLEN(s2); 217c66bbc91SGabor Kovesdan 218c66bbc91SGabor Kovesdan if (slen1 < 1) 219c66bbc91SGabor Kovesdan return (-1); 220c66bbc91SGabor Kovesdan if (slen2 < 1) 221c66bbc91SGabor Kovesdan return (+1); 222c66bbc91SGabor Kovesdan 223c66bbc91SGabor Kovesdan si1 = bws_begin(s1); 224c66bbc91SGabor Kovesdan si2 = bws_begin(s2); 225c66bbc91SGabor Kovesdan 226c66bbc91SGabor Kovesdan c1 = bws_get_iter_value(si1); 227c66bbc91SGabor Kovesdan c2 = bws_get_iter_value(si2); 228c66bbc91SGabor Kovesdan 229c66bbc91SGabor Kovesdan if (c1 == L'.' && (slen1 == 1)) 230c66bbc91SGabor Kovesdan return (-1); 231c66bbc91SGabor Kovesdan 232c66bbc91SGabor Kovesdan if (c2 == L'.' && (slen2 == 1)) 233c66bbc91SGabor Kovesdan return (+1); 234c66bbc91SGabor Kovesdan 235c66bbc91SGabor Kovesdan if (slen1 == 2 && c1 == L'.' && 236c66bbc91SGabor Kovesdan bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.') 237c66bbc91SGabor Kovesdan return (-1); 238c66bbc91SGabor Kovesdan if (slen2 == 2 && c2 == L'.' && 239c66bbc91SGabor Kovesdan bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.') 240c66bbc91SGabor Kovesdan return (+1); 241c66bbc91SGabor Kovesdan 242c66bbc91SGabor Kovesdan if (c1 == L'.' && c2 != L'.') 243c66bbc91SGabor Kovesdan return (-1); 244c66bbc91SGabor Kovesdan if (c1 != L'.' && c2 == L'.') 245c66bbc91SGabor Kovesdan return (+1); 246c66bbc91SGabor Kovesdan 247c66bbc91SGabor Kovesdan if (c1 == L'.' && c2 == L'.') { 248c66bbc91SGabor Kovesdan si1 = bws_iterator_inc(si1, 1); 249c66bbc91SGabor Kovesdan si2 = bws_iterator_inc(si2, 1); 250c66bbc91SGabor Kovesdan } 251c66bbc91SGabor Kovesdan 252c66bbc91SGabor Kovesdan find_suffix(si1, bws_end(s1), &len1); 253c66bbc91SGabor Kovesdan find_suffix(si2, bws_end(s2), &len2); 254c66bbc91SGabor Kovesdan 255c66bbc91SGabor Kovesdan if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0)) 256c66bbc91SGabor Kovesdan return (cmp_bytes); 257c66bbc91SGabor Kovesdan 258c66bbc91SGabor Kovesdan cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2, 259c66bbc91SGabor Kovesdan bws_iterator_inc(si2, len2)); 260c66bbc91SGabor Kovesdan 261c66bbc91SGabor Kovesdan if (cmp_res == 0) 262c66bbc91SGabor Kovesdan cmp_res = cmp_bytes; 263c66bbc91SGabor Kovesdan 264c66bbc91SGabor Kovesdan return (cmp_res); 265c66bbc91SGabor Kovesdan } 266