1 /* $OpenBSD: uniq.c,v 1.29 2021/11/17 23:09:38 cheloha Exp $ */ 2 /* $NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Case Larsen. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <errno.h> 39 #include <limits.h> 40 #include <locale.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <unistd.h> 45 #include <wchar.h> 46 #include <wctype.h> 47 48 int cflag, dflag, iflag, uflag; 49 int numchars, numfields, repeats; 50 51 FILE *file(char *, char *); 52 void show(FILE *, char *); 53 char *skip(char *); 54 void obsolete(char *[]); 55 __dead void usage(void); 56 57 int 58 main(int argc, char *argv[]) 59 { 60 char *prevline, *t1, *t2, *thisline; 61 FILE *ifp = NULL, *ofp = NULL; 62 size_t prevsize, thissize, tmpsize; 63 ssize_t len; 64 int ch; 65 66 setlocale(LC_CTYPE, ""); 67 68 if (pledge("stdio rpath wpath cpath", NULL) == -1) 69 err(1, "pledge"); 70 71 obsolete(argv); 72 while ((ch = getopt(argc, argv, "cdf:is:u")) != -1) { 73 const char *errstr; 74 75 switch (ch) { 76 case 'c': 77 cflag = 1; 78 break; 79 case 'd': 80 dflag = 1; 81 break; 82 case 'f': 83 numfields = (int)strtonum(optarg, 0, INT_MAX, 84 &errstr); 85 if (errstr) 86 errx(1, "field skip value is %s: %s", 87 errstr, optarg); 88 break; 89 case 'i': 90 iflag = 1; 91 break; 92 case 's': 93 numchars = (int)strtonum(optarg, 0, INT_MAX, 94 &errstr); 95 if (errstr) 96 errx(1, 97 "character skip value is %s: %s", 98 errstr, optarg); 99 break; 100 case 'u': 101 uflag = 1; 102 break; 103 default: 104 usage(); 105 } 106 } 107 108 argc -= optind; 109 argv += optind; 110 111 /* If neither -d nor -u are set, default is -d -u. */ 112 if (!dflag && !uflag) 113 dflag = uflag = 1; 114 115 switch (argc) { 116 case 0: 117 ifp = stdin; 118 ofp = stdout; 119 break; 120 case 1: 121 ifp = file(argv[0], "r"); 122 ofp = stdout; 123 break; 124 case 2: 125 ifp = file(argv[0], "r"); 126 ofp = file(argv[1], "w"); 127 break; 128 default: 129 usage(); 130 } 131 132 if (pledge("stdio", NULL) == -1) 133 err(1, "pledge"); 134 135 prevsize = 0; 136 prevline = NULL; 137 if ((len = getline(&prevline, &prevsize, ifp)) == -1) { 138 free(prevline); 139 if (ferror(ifp)) 140 err(1, "getline"); 141 exit(0); 142 } 143 if (prevline[len - 1] == '\n') 144 prevline[len - 1] = '\0'; 145 146 thissize = 0; 147 thisline = NULL; 148 while ((len = getline(&thisline, &thissize, ifp)) != -1) { 149 if (thisline[len - 1] == '\n') 150 thisline[len - 1] = '\0'; 151 152 /* If requested get the chosen fields + character offsets. */ 153 if (numfields || numchars) { 154 t1 = skip(thisline); 155 t2 = skip(prevline); 156 } else { 157 t1 = thisline; 158 t2 = prevline; 159 } 160 161 /* If different, print; set previous to new value. */ 162 if ((iflag ? strcasecmp : strcmp)(t1, t2)) { 163 show(ofp, prevline); 164 t1 = prevline; 165 prevline = thisline; 166 thisline = t1; 167 tmpsize = prevsize; 168 prevsize = thissize; 169 thissize = tmpsize; 170 repeats = 0; 171 } else 172 ++repeats; 173 } 174 free(thisline); 175 if (ferror(ifp)) 176 err(1, "getline"); 177 178 show(ofp, prevline); 179 free(prevline); 180 181 exit(0); 182 } 183 184 /* 185 * show -- 186 * Output a line depending on the flags and number of repetitions 187 * of the line. 188 */ 189 void 190 show(FILE *ofp, char *str) 191 { 192 if ((dflag && repeats) || (uflag && !repeats)) { 193 if (cflag) 194 fprintf(ofp, "%4d %s\n", repeats + 1, str); 195 else 196 fprintf(ofp, "%s\n", str); 197 } 198 } 199 200 char * 201 skip(char *str) 202 { 203 wchar_t wc; 204 int nchars, nfields; 205 int len; 206 int field_started; 207 208 for (nfields = numfields; nfields && *str; nfields--) { 209 /* Skip one field, including preceding blanks. */ 210 for (field_started = 0; *str != '\0'; str += len) { 211 if ((len = mbtowc(&wc, str, MB_CUR_MAX)) == -1) { 212 (void)mbtowc(NULL, NULL, MB_CUR_MAX); 213 wc = L'?'; 214 len = 1; 215 } 216 if (iswblank(wc)) { 217 if (field_started) 218 break; 219 } else 220 field_started = 1; 221 } 222 } 223 224 /* Skip some additional characters. */ 225 for (nchars = numchars; nchars-- && *str != '\0'; str += len) 226 if ((len = mblen(str, MB_CUR_MAX)) == -1) 227 len = 1; 228 229 return (str); 230 } 231 232 FILE * 233 file(char *name, char *mode) 234 { 235 FILE *fp; 236 237 if (strcmp(name, "-") == 0) 238 return(*mode == 'r' ? stdin : stdout); 239 if ((fp = fopen(name, mode)) == NULL) 240 err(1, "%s", name); 241 return (fp); 242 } 243 244 void 245 obsolete(char *argv[]) 246 { 247 size_t len; 248 char *ap, *p, *start; 249 250 while ((ap = *++argv)) { 251 /* Return if "--" or not an option of any form. */ 252 if (ap[0] != '-') { 253 if (ap[0] != '+') 254 return; 255 } else if (ap[1] == '-') 256 return; 257 if (!isdigit((unsigned char)ap[1])) 258 continue; 259 /* 260 * Digit signifies an old-style option. Malloc space for dash, 261 * new option and argument. 262 */ 263 len = strlen(ap) + 3; 264 if ((start = p = malloc(len)) == NULL) 265 err(1, "malloc"); 266 *p++ = '-'; 267 *p++ = ap[0] == '+' ? 's' : 'f'; 268 (void)strlcpy(p, ap + 1, len - 2); 269 *argv = start; 270 } 271 } 272 273 __dead void 274 usage(void) 275 { 276 extern char *__progname; 277 278 (void)fprintf(stderr, 279 "usage: %s [-ci] [-d | -u] [-f fields] [-s chars] [input_file [output_file]]\n", 280 __progname); 281 exit(1); 282 } 283