1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Case Larsen. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#) Copyright (c) 1989, 1993 The Regents of the University of California. All rights reserved. 33 * @(#)uniq.c 8.3 (Berkeley) 5/4/95 34 * $FreeBSD: head/usr.bin/uniq/uniq.c 263234 2014-03-16 11:04:44Z rwatson $ 35 */ 36 37 #include <ctype.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <limits.h> 41 #include <locale.h> 42 #include <nl_types.h> 43 #include <stdint.h> 44 #define _WITH_GETLINE 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <termios.h> 49 #include <unistd.h> 50 #include <wchar.h> 51 #include <wctype.h> 52 53 static int cflag, dflag, uflag, iflag; 54 static int numchars, numfields, repeats; 55 56 static FILE *file(const char *, const char *); 57 static wchar_t *convert(const char *); 58 static int inlcmp(const char *, const char *); 59 static void show(FILE *, const char *); 60 static wchar_t *skip(wchar_t *); 61 static void obsolete(char *[]); 62 static void usage(void); 63 64 int 65 main(int argc, char *argv[]) 66 { 67 wchar_t *tprev, *tthis; 68 FILE *ifp, *ofp; 69 int ch, comp; 70 size_t prevbuflen, thisbuflen, b1; 71 char *prevline, *thisline, *p; 72 const char *ifn; 73 74 (void) setlocale(LC_ALL, ""); 75 76 obsolete(argv); 77 while ((ch = getopt(argc, argv, "cdif:s:u")) != -1) 78 switch (ch) { 79 case 'c': 80 cflag = 1; 81 break; 82 case 'd': 83 dflag = 1; 84 break; 85 case 'i': 86 iflag = 1; 87 break; 88 case 'f': 89 numfields = strtol(optarg, &p, 10); 90 if (numfields < 0 || *p) 91 errx(1, "illegal field skip value: %s", optarg); 92 break; 93 case 's': 94 numchars = strtol(optarg, &p, 10); 95 if (numchars < 0 || *p) 96 errx(1, "illegal character skip value: %s", optarg); 97 break; 98 case 'u': 99 uflag = 1; 100 break; 101 case '?': 102 default: 103 usage(); 104 } 105 106 argc -= optind; 107 argv += optind; 108 109 /* If no flags are set, default is -d -u. */ 110 if (cflag) { 111 if (dflag || uflag) 112 usage(); 113 } else if (!dflag && !uflag) 114 dflag = uflag = 1; 115 116 if (argc > 2) 117 usage(); 118 119 ifp = stdin; 120 ifn = "stdin"; 121 ofp = stdout; 122 if (argc > 0 && strcmp(argv[0], "-") != 0) 123 ifp = file(ifn = argv[0], "r"); 124 if (argc > 1) 125 ofp = file(argv[1], "w"); 126 127 prevbuflen = thisbuflen = 0; 128 prevline = thisline = NULL; 129 130 if (getline(&prevline, &prevbuflen, ifp) < 0) { 131 if (ferror(ifp)) 132 err(1, "%s", ifn); 133 exit(0); 134 } 135 tprev = convert(prevline); 136 137 if (!cflag && uflag && dflag) 138 show(ofp, prevline); 139 140 tthis = NULL; 141 while (getline(&thisline, &thisbuflen, ifp) >= 0) { 142 if (tthis != NULL) 143 free(tthis); 144 tthis = convert(thisline); 145 146 if (tthis == NULL && tprev == NULL) 147 comp = inlcmp(thisline, prevline); 148 else if (tthis == NULL || tprev == NULL) 149 comp = 1; 150 else 151 comp = wcscoll(tthis, tprev); 152 153 if (comp) { 154 /* If different, print; set previous to new value. */ 155 if (cflag || !dflag || !uflag) 156 show(ofp, prevline); 157 p = prevline; 158 b1 = prevbuflen; 159 prevline = thisline; 160 prevbuflen = thisbuflen; 161 if (tprev != NULL) 162 free(tprev); 163 tprev = tthis; 164 if (!cflag && uflag && dflag) 165 show(ofp, prevline); 166 thisline = p; 167 thisbuflen = b1; 168 tthis = NULL; 169 repeats = 0; 170 } else 171 ++repeats; 172 } 173 if (ferror(ifp)) 174 err(1, "%s", ifn); 175 if (cflag || !dflag || !uflag) 176 show(ofp, prevline); 177 exit(0); 178 } 179 180 static wchar_t * 181 convert(const char *str) 182 { 183 size_t n; 184 wchar_t *buf, *ret, *p; 185 186 if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1) 187 return (NULL); 188 if (SIZE_MAX / sizeof(*buf) < n + 1) 189 errx(1, "conversion buffer length overflow"); 190 if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL) 191 err(1, "malloc"); 192 if (mbstowcs(buf, str, n + 1) != n) 193 errx(1, "internal mbstowcs() error"); 194 /* The last line may not end with \n. */ 195 if (n > 0 && buf[n - 1] == L'\n') 196 buf[n - 1] = L'\0'; 197 198 /* If requested get the chosen fields + character offsets. */ 199 if (numfields || numchars) { 200 if ((ret = wcsdup(skip(buf))) == NULL) 201 err(1, "wcsdup"); 202 free(buf); 203 } else 204 ret = buf; 205 206 if (iflag) { 207 for (p = ret; *p != L'\0'; p++) 208 *p = towlower(*p); 209 } 210 211 return (ret); 212 } 213 214 static int 215 inlcmp(const char *s1, const char *s2) 216 { 217 int c1, c2; 218 219 while (*s1 == *s2++) 220 if (*s1++ == '\0') 221 return (0); 222 c1 = (unsigned char)*s1; 223 c2 = (unsigned char)*(s2 - 1); 224 /* The last line may not end with \n. */ 225 if (c1 == '\n') 226 c1 = '\0'; 227 if (c2 == '\n') 228 c2 = '\0'; 229 return (c1 - c2); 230 } 231 232 /* 233 * show -- 234 * Output a line depending on the flags and number of repetitions 235 * of the line. 236 */ 237 static void 238 show(FILE *ofp, const char *str) 239 { 240 241 if (cflag) 242 (void)fprintf(ofp, "%4d %s", repeats + 1, str); 243 if ((dflag && repeats) || (uflag && !repeats)) 244 (void)fprintf(ofp, "%s", str); 245 } 246 247 static wchar_t * 248 skip(wchar_t *str) 249 { 250 int nchars, nfields; 251 252 for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) { 253 while (iswblank(*str)) 254 str++; 255 while (*str != L'\0' && !iswblank(*str)) 256 str++; 257 } 258 for (nchars = numchars; nchars-- && *str != L'\0'; ++str) 259 ; 260 return(str); 261 } 262 263 static FILE * 264 file(const char *name, const char *mode) 265 { 266 FILE *fp; 267 268 if ((fp = fopen(name, mode)) == NULL) 269 err(1, "%s", name); 270 return(fp); 271 } 272 273 static void 274 obsolete(char *argv[]) 275 { 276 int len; 277 char *ap, *p, *start; 278 279 while ((ap = *++argv)) { 280 /* Return if "--" or not an option of any form. */ 281 if (ap[0] != '-') { 282 if (ap[0] != '+') 283 return; 284 } else if (ap[1] == '-') 285 return; 286 if (!isdigit((unsigned char)ap[1])) 287 continue; 288 /* 289 * Digit signifies an old-style option. Malloc space for dash, 290 * new option and argument. 291 */ 292 len = strlen(ap); 293 if ((start = p = malloc(len + 3)) == NULL) 294 err(1, "malloc"); 295 *p++ = '-'; 296 *p++ = ap[0] == '+' ? 's' : 'f'; 297 (void)strcpy(p, ap + 1); 298 *argv = start; 299 } 300 } 301 302 static void 303 usage(void) 304 { 305 (void)fprintf(stderr, 306 "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); 307 exit(1); 308 } 309