1 /* $OpenBSD: vsort.c,v 1.2 2015/04/01 21:46:38 millert Exp $ */ 2 3 /*- 4 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 5 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/types.h> 31 32 #include <ctype.h> 33 #include <stdlib.h> 34 #include <string.h> 35 36 #include "sort.h" 37 #include "vsort.h" 38 39 static inline bool 40 isdigit_clocale(wchar_t c) 41 { 42 return (c >= L'0' && c <= L'9'); 43 } 44 45 static inline bool 46 isalpha_clocale(wchar_t c) 47 { 48 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z')); 49 } 50 51 static inline bool 52 isalnum_clocale(wchar_t c) 53 { 54 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || 55 (c >= L'0' && c <= L'9')); 56 } 57 58 /* 59 * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$ 60 * Set length of string before suffix. 61 */ 62 static void 63 find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len) 64 { 65 wchar_t c; 66 size_t clen; 67 bool expect_alpha, sfx; 68 69 sfx = false; 70 expect_alpha = false; 71 *len = 0; 72 clen = 0; 73 74 while ((si < se) && (c = bws_get_iter_value(si))) { 75 if (expect_alpha) { 76 expect_alpha = false; 77 if (!isalpha_clocale(c) && (c != L'~')) 78 sfx = false; 79 } else if (c == L'.') { 80 expect_alpha = true; 81 if (!sfx) { 82 sfx = true; 83 *len = clen; 84 } 85 } else if (!isalnum_clocale(c) && (c != L'~')) 86 sfx = false; 87 88 si = bws_iterator_inc(si, 1); 89 ++clen; 90 } 91 92 /* This code must be here to make the implementation compatible 93 * with WORDING of GNU sort documentation. 94 * But the GNU sort implementation is not following its own 95 * documentation. GNU sort allows empty file extensions 96 * (just dot with nothing after); but the regular expression in 97 * their documentation does not allow empty file extensions. 98 * We chose to make our implementation compatible with GNU sort 99 * implementation. If they will ever fix their bug, this code 100 * must be uncommented. Or they may choose to fix the info page, 101 * then the code stays commented. 102 * 103 if (expect_alpha) 104 sfx = false; 105 */ 106 107 if (!sfx) 108 *len = clen; 109 } 110 111 static inline int 112 cmp_chars(wchar_t c1, wchar_t c2) 113 { 114 if (c1 == c2) 115 return 0; 116 117 if (c1 == L'~') 118 return -1; 119 if (c2 == L'~') 120 return 1; 121 122 if (isdigit_clocale(c1) || !c1) 123 return (isdigit_clocale(c2) || !c2) ? 0 : -1; 124 125 if (isdigit_clocale(c2) || !c2) 126 return 1; 127 128 if (isalpha_clocale(c1)) 129 return isalpha_clocale(c2) ? (int)c1 - (int)c2 : -1; 130 131 if (isalpha_clocale(c2)) 132 return 1; 133 134 return (int)c1 - (int)c2; 135 } 136 137 static int 138 cmpversions(bwstring_iterator si1, bwstring_iterator se1, 139 bwstring_iterator si2, bwstring_iterator se2) 140 { 141 int cmp, diff; 142 143 while ((si1 < se1) || (si2 < se2)) { 144 diff = 0; 145 146 while (((si1 < se1) && 147 !isdigit_clocale(bws_get_iter_value(si1))) || 148 ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) { 149 wchar_t c1, c2; 150 151 c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0; 152 c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0; 153 154 cmp = cmp_chars(c1, c2); 155 if (cmp) 156 return cmp; 157 158 if (si1 < se1) 159 si1 = bws_iterator_inc(si1, 1); 160 if (si2 < se2) 161 si2 = bws_iterator_inc(si2, 1); 162 } 163 164 while (bws_get_iter_value(si1) == L'0') 165 si1 = bws_iterator_inc(si1, 1); 166 167 while (bws_get_iter_value(si2) == L'0') 168 si2 = bws_iterator_inc(si2, 1); 169 170 while (isdigit_clocale(bws_get_iter_value(si1)) && 171 isdigit_clocale(bws_get_iter_value(si2))) { 172 if (!diff) 173 diff = ((int)bws_get_iter_value(si1) - 174 (int)bws_get_iter_value(si2)); 175 si1 = bws_iterator_inc(si1, 1); 176 si2 = bws_iterator_inc(si2, 1); 177 } 178 179 if (isdigit_clocale(bws_get_iter_value(si1))) 180 return 1; 181 182 if (isdigit_clocale(bws_get_iter_value(si2))) 183 return -1; 184 185 if (diff) 186 return diff; 187 } 188 189 return 0; 190 } 191 192 /* 193 * Compare two version strings 194 */ 195 int 196 vcmp(struct bwstring *s1, struct bwstring *s2) 197 { 198 bwstring_iterator si1, si2; 199 wchar_t c1, c2; 200 size_t len1, len2, slen1, slen2; 201 int cmp_bytes, cmp_res; 202 203 if (s1 == s2) 204 return 0; 205 206 cmp_bytes = bwscmp(s1, s2, 0); 207 if (cmp_bytes == 0) 208 return 0; 209 210 len1 = slen1 = BWSLEN(s1); 211 len2 = slen2 = BWSLEN(s2); 212 213 if (slen1 < 1) 214 return -1; 215 if (slen2 < 1) 216 return 1; 217 218 si1 = bws_begin(s1); 219 si2 = bws_begin(s2); 220 221 c1 = bws_get_iter_value(si1); 222 c2 = bws_get_iter_value(si2); 223 224 if (c1 == L'.' && (slen1 == 1)) 225 return -1; 226 227 if (c2 == L'.' && (slen2 == 1)) 228 return 1; 229 230 if (slen1 == 2 && c1 == L'.' && 231 bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.') 232 return -1; 233 if (slen2 == 2 && c2 == L'.' && 234 bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.') 235 return 1; 236 237 if (c1 == L'.' && c2 != L'.') 238 return -1; 239 if (c1 != L'.' && c2 == L'.') 240 return 1; 241 242 if (c1 == L'.' && c2 == L'.') { 243 si1 = bws_iterator_inc(si1, 1); 244 si2 = bws_iterator_inc(si2, 1); 245 } 246 247 find_suffix(si1, bws_end(s1), &len1); 248 find_suffix(si2, bws_end(s2), &len2); 249 250 if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0)) 251 return cmp_bytes; 252 253 cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2, 254 bws_iterator_inc(si2, len2)); 255 256 if (cmp_res == 0) 257 cmp_res = cmp_bytes; 258 259 return cmp_res; 260 } 261