1 /* $OpenBSD: uniq.c,v 1.33 2022/01/01 18:20:52 cheloha Exp $ */ 2 /* $NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Case Larsen. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <strings.h> 44 #include <unistd.h> 45 #include <wchar.h> 46 #include <wctype.h> 47 48 long long numchars, numfields; 49 unsigned long long repeats; 50 int cflag, dflag, iflag, uflag; 51 52 void show(const char *); 53 char *skip(char *); 54 void obsolete(char *[]); 55 __dead void usage(void); 56 57 int 58 main(int argc, char *argv[]) 59 { 60 const char *errstr; 61 char *p, *prevline, *t, *thisline, *tmp; 62 size_t prevsize, thissize, tmpsize; 63 ssize_t len; 64 int ch; 65 66 setlocale(LC_CTYPE, ""); 67 68 if (pledge("stdio rpath wpath cpath", NULL) == -1) 69 err(1, "pledge"); 70 71 obsolete(argv); 72 while ((ch = getopt(argc, argv, "cdf:is:u")) != -1) { 73 switch (ch) { 74 case 'c': 75 cflag = 1; 76 break; 77 case 'd': 78 dflag = 1; 79 break; 80 case 'f': 81 numfields = strtonum(optarg, 0, LLONG_MAX, &errstr); 82 if (errstr) 83 errx(1, "fields is %s: %s", errstr, optarg); 84 break; 85 case 'i': 86 iflag = 1; 87 break; 88 case 's': 89 numchars = strtonum(optarg, 0, LLONG_MAX, &errstr); 90 if (errstr) 91 errx(1, "chars is %s: %s", errstr, optarg); 92 break; 93 case 'u': 94 uflag = 1; 95 break; 96 default: 97 usage(); 98 } 99 } 100 argc -= optind; 101 argv += optind; 102 103 /* If neither -d nor -u are set, default is -d -u. */ 104 if (!dflag && !uflag) 105 dflag = uflag = 1; 106 107 if (argc > 2) 108 usage(); 109 if (argc >= 1 && strcmp(argv[0], "-") != 0) { 110 if (freopen(argv[0], "r", stdin) == NULL) 111 err(1, "%s", argv[0]); 112 } 113 if (argc == 2 && strcmp(argv[1], "-") != 0) { 114 if (freopen(argv[1], "w", stdout) == NULL) 115 err(1, "%s", argv[1]); 116 } 117 118 if (pledge("stdio", NULL) == -1) 119 err(1, "pledge"); 120 121 prevsize = 0; 122 prevline = NULL; 123 if ((len = getline(&prevline, &prevsize, stdin)) == -1) { 124 free(prevline); 125 if (ferror(stdin)) 126 err(1, "getline"); 127 return 0; 128 } 129 if (prevline[len - 1] == '\n') 130 prevline[len - 1] = '\0'; 131 if (numfields || numchars) 132 p = skip(prevline); 133 else 134 p = prevline; 135 136 thissize = 0; 137 thisline = NULL; 138 while ((len = getline(&thisline, &thissize, stdin)) != -1) { 139 if (thisline[len - 1] == '\n') 140 thisline[len - 1] = '\0'; 141 142 /* If requested get the chosen fields + character offsets. */ 143 if (numfields || numchars) 144 t = skip(thisline); 145 else 146 t = thisline; 147 148 /* If different, print; set previous to new value. */ 149 if ((iflag ? strcasecmp : strcmp)(p, t)) { 150 show(prevline); 151 tmp = prevline; 152 prevline = thisline; 153 thisline = tmp; 154 tmp = p; 155 p = t; 156 t = tmp; 157 tmpsize = prevsize; 158 prevsize = thissize; 159 thissize = tmpsize; 160 repeats = 0; 161 } else 162 ++repeats; 163 } 164 free(thisline); 165 if (ferror(stdin)) 166 err(1, "getline"); 167 168 show(prevline); 169 free(prevline); 170 171 return 0; 172 } 173 174 /* 175 * show -- 176 * Output a line depending on the flags and number of repetitions 177 * of the line. 178 */ 179 void 180 show(const char *str) 181 { 182 if ((dflag && repeats) || (uflag && !repeats)) { 183 if (cflag) 184 printf("%4llu %s\n", repeats + 1, str); 185 else 186 printf("%s\n", str); 187 } 188 } 189 190 char * 191 skip(char *str) 192 { 193 long long nchars, nfields; 194 wchar_t wc; 195 int len; 196 int field_started; 197 198 for (nfields = numfields; nfields && *str; nfields--) { 199 /* Skip one field, including preceding blanks. */ 200 for (field_started = 0; *str != '\0'; str += len) { 201 if ((len = mbtowc(&wc, str, MB_CUR_MAX)) == -1) { 202 (void)mbtowc(NULL, NULL, MB_CUR_MAX); 203 wc = L'?'; 204 len = 1; 205 } 206 if (iswblank(wc)) { 207 if (field_started) 208 break; 209 } else 210 field_started = 1; 211 } 212 } 213 214 /* Skip some additional characters. */ 215 for (nchars = numchars; nchars-- && *str != '\0'; str += len) 216 if ((len = mblen(str, MB_CUR_MAX)) == -1) 217 len = 1; 218 219 return (str); 220 } 221 222 void 223 obsolete(char *argv[]) 224 { 225 size_t len; 226 char *ap, *p, *start; 227 228 while ((ap = *++argv)) { 229 /* Return if "--" or not an option of any form. */ 230 if (ap[0] != '-') { 231 if (ap[0] != '+') 232 return; 233 } else if (ap[1] == '-') 234 return; 235 if (!isdigit((unsigned char)ap[1])) 236 continue; 237 /* 238 * Digit signifies an old-style option. Malloc space for dash, 239 * new option and argument. 240 */ 241 len = strlen(ap) + 3; 242 if ((start = p = malloc(len)) == NULL) 243 err(1, "malloc"); 244 *p++ = '-'; 245 *p++ = ap[0] == '+' ? 's' : 'f'; 246 (void)strlcpy(p, ap + 1, len - 2); 247 *argv = start; 248 } 249 } 250 251 __dead void 252 usage(void) 253 { 254 fprintf(stderr, 255 "usage: %s [-ci] [-d | -u] [-f fields] [-s chars] [input_file [output_file]]\n", 256 getprogname()); 257 exit(1); 258 } 259