1 /* $OpenBSD: uniq.c,v 1.18 2009/10/27 23:59:46 deraadt Exp $ */ 2 /* $NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Case Larsen. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <errno.h> 39 #include <limits.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 45 #define MAXLINELEN (8 * 1024) 46 47 int cflag, dflag, uflag; 48 int numchars, numfields, repeats; 49 50 FILE *file(char *, char *); 51 void show(FILE *, char *); 52 char *skip(char *); 53 void obsolete(char *[]); 54 __dead void usage(void); 55 56 int 57 main(int argc, char *argv[]) 58 { 59 char *t1, *t2; 60 FILE *ifp = NULL, *ofp = NULL; 61 int ch; 62 char *prevline, *thisline; 63 64 obsolete(argv); 65 while ((ch = getopt(argc, argv, "cdf:s:u")) != -1) { 66 const char *errstr; 67 68 switch (ch) { 69 case 'c': 70 cflag = 1; 71 break; 72 case 'd': 73 dflag = 1; 74 break; 75 case 'f': 76 numfields = (int)strtonum(optarg, 0, INT_MAX, 77 &errstr); 78 if (errstr) 79 errx(1, "field skip value is %s: %s", 80 errstr, optarg); 81 break; 82 case 's': 83 numchars = (int)strtonum(optarg, 0, INT_MAX, 84 &errstr); 85 if (errstr) 86 errx(1, 87 "character skip value is %s: %s", 88 errstr, optarg); 89 break; 90 case 'u': 91 uflag = 1; 92 break; 93 default: 94 usage(); 95 } 96 } 97 98 argc -= optind; 99 argv += optind; 100 101 /* If neither -d nor -u are set, default is -d -u. */ 102 if (!dflag && !uflag) 103 dflag = uflag = 1; 104 105 switch(argc) { 106 case 0: 107 ifp = stdin; 108 ofp = stdout; 109 break; 110 case 1: 111 ifp = file(argv[0], "r"); 112 ofp = stdout; 113 break; 114 case 2: 115 ifp = file(argv[0], "r"); 116 ofp = file(argv[1], "w"); 117 break; 118 default: 119 usage(); 120 } 121 122 prevline = malloc(MAXLINELEN); 123 thisline = malloc(MAXLINELEN); 124 if (prevline == NULL || thisline == NULL) 125 err(1, "malloc"); 126 127 if (fgets(prevline, MAXLINELEN, ifp) == NULL) 128 exit(0); 129 130 while (fgets(thisline, MAXLINELEN, ifp)) { 131 /* If requested get the chosen fields + character offsets. */ 132 if (numfields || numchars) { 133 t1 = skip(thisline); 134 t2 = skip(prevline); 135 } else { 136 t1 = thisline; 137 t2 = prevline; 138 } 139 140 /* If different, print; set previous to new value. */ 141 if (strcmp(t1, t2)) { 142 show(ofp, prevline); 143 t1 = prevline; 144 prevline = thisline; 145 thisline = t1; 146 repeats = 0; 147 } else 148 ++repeats; 149 } 150 show(ofp, prevline); 151 exit(0); 152 } 153 154 /* 155 * show -- 156 * Output a line depending on the flags and number of repetitions 157 * of the line. 158 */ 159 void 160 show(FILE *ofp, char *str) 161 { 162 if ((dflag && repeats) || (uflag && !repeats)) { 163 if (cflag) 164 (void)fprintf(ofp, "%4d %s", repeats + 1, str); 165 else 166 (void)fprintf(ofp, "%s", str); 167 } 168 } 169 170 char * 171 skip(char *str) 172 { 173 int nchars, nfields; 174 175 for (nfields = numfields; nfields && *str; nfields--) { 176 while (isblank(*str)) 177 str++; 178 while (*str && !isblank(*str)) 179 str++; 180 } 181 for (nchars = numchars; nchars-- && *str && *str != '\n'; ++str) 182 ; 183 return (str); 184 } 185 186 FILE * 187 file(char *name, char *mode) 188 { 189 FILE *fp; 190 191 if (strcmp(name, "-") == 0) 192 return(*mode == 'r' ? stdin : stdout); 193 if ((fp = fopen(name, mode)) == NULL) 194 err(1, "%s", name); 195 return (fp); 196 } 197 198 void 199 obsolete(char *argv[]) 200 { 201 size_t len; 202 char *ap, *p, *start; 203 204 while ((ap = *++argv)) { 205 /* Return if "--" or not an option of any form. */ 206 if (ap[0] != '-') { 207 if (ap[0] != '+') 208 return; 209 } else if (ap[1] == '-') 210 return; 211 if (!isdigit(ap[1])) 212 continue; 213 /* 214 * Digit signifies an old-style option. Malloc space for dash, 215 * new option and argument. 216 */ 217 len = strlen(ap) + 3; 218 if ((start = p = malloc(len)) == NULL) 219 err(1, "malloc"); 220 *p++ = '-'; 221 *p++ = ap[0] == '+' ? 's' : 'f'; 222 (void)strlcpy(p, ap + 1, len - 2); 223 *argv = start; 224 } 225 } 226 227 __dead void 228 usage(void) 229 { 230 extern char *__progname; 231 232 (void)fprintf(stderr, 233 "usage: %s [-c] [-d | -u] [-f fields] [-s chars] [input_file [output_file]]\n", 234 __progname); 235 exit(1); 236 } 237