1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Case Larsen. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#) Copyright (c) 1989, 1993 The Regents of the University of California. All rights reserved. 33 * @(#)uniq.c 8.3 (Berkeley) 5/4/95 34 * $FreeBSD: src/usr.bin/uniq/uniq.c,v 1.11.2.3 2002/06/28 08:02:19 tjr Exp $ 35 * $DragonFly: src/usr.bin/uniq/uniq.c,v 1.5 2008/10/16 01:52:34 swildner Exp $ 36 */ 37 38 #include <ctype.h> 39 #include <err.h> 40 #include <limits.h> 41 #include <locale.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <unistd.h> 46 47 #define MAXLINELEN (LINE_MAX + 1) 48 49 int cflag, dflag, uflag; 50 int numchars, numfields, repeats; 51 52 FILE *file(const char *, const char *); 53 char *getline(char *, size_t, FILE *); 54 void show(FILE *, char *); 55 char *skip(char *); 56 void obsolete(char *[]); 57 static void usage(void); 58 int stricoll(char *, char*); 59 60 int 61 main(int argc, char **argv) 62 { 63 char *t1, *t2; 64 FILE *ifp, *ofp; 65 int ch; 66 char *prevline, *thisline, *p; 67 int iflag = 0, comp; 68 69 (void) setlocale(LC_ALL, ""); 70 71 obsolete(argv); 72 while ((ch = getopt(argc, argv, "cdif:s:u")) != -1) 73 switch (ch) { 74 case 'c': 75 cflag = 1; 76 break; 77 case 'd': 78 dflag = 1; 79 break; 80 case 'i': 81 iflag = 1; 82 break; 83 case 'f': 84 numfields = strtol(optarg, &p, 10); 85 if (numfields < 0 || *p) 86 errx(1, "illegal field skip value: %s", optarg); 87 break; 88 case 's': 89 numchars = strtol(optarg, &p, 10); 90 if (numchars < 0 || *p) 91 errx(1, "illegal character skip value: %s", optarg); 92 break; 93 case 'u': 94 uflag = 1; 95 break; 96 case '?': 97 default: 98 usage(); 99 } 100 101 argc -= optind; 102 argv +=optind; 103 104 /* If no flags are set, default is -d -u. */ 105 if (cflag) { 106 if (dflag || uflag) 107 usage(); 108 } else if (!dflag && !uflag) 109 dflag = uflag = 1; 110 111 if (argc > 2) 112 usage(); 113 114 ifp = stdin; 115 ofp = stdout; 116 if (argc > 0 && strcmp(argv[0], "-") != 0) 117 ifp = file(argv[0], "r"); 118 if (argc > 1) 119 ofp = file(argv[1], "w"); 120 121 prevline = malloc(MAXLINELEN); 122 thisline = malloc(MAXLINELEN); 123 if (prevline == NULL || thisline == NULL) 124 errx(1, "malloc"); 125 126 if (getline(prevline, MAXLINELEN, ifp) == NULL) 127 exit(0); 128 129 while (getline(thisline, MAXLINELEN, ifp)) { 130 /* If requested get the chosen fields + character offsets. */ 131 if (numfields || numchars) { 132 t1 = skip(thisline); 133 t2 = skip(prevline); 134 } else { 135 t1 = thisline; 136 t2 = prevline; 137 } 138 139 /* If different, print; set previous to new value. */ 140 if (iflag) 141 comp = stricoll(t1, t2); 142 else 143 comp = strcoll(t1, t2); 144 145 if (comp) { 146 show(ofp, prevline); 147 t1 = prevline; 148 prevline = thisline; 149 thisline = t1; 150 repeats = 0; 151 } else 152 ++repeats; 153 } 154 show(ofp, prevline); 155 exit(0); 156 } 157 158 char * 159 getline(char *buf, size_t buflen, FILE *fp) 160 { 161 size_t bufpos; 162 int ch = EOF; 163 164 bufpos = 0; 165 while (bufpos + 2 != buflen && (ch = getc(fp)) != EOF && ch != '\n') 166 buf[bufpos++] = ch; 167 if (bufpos + 1 != buflen) 168 buf[bufpos] = '\0'; 169 while (ch != EOF && ch != '\n') 170 ch = getc(fp); 171 172 return (bufpos != 0 || ch == '\n' ? buf : NULL); 173 } 174 175 /* 176 * show -- 177 * Output a line depending on the flags and number of repetitions 178 * of the line. 179 */ 180 void 181 show(FILE *ofp, char *str) 182 { 183 184 if (cflag && *str) 185 (void)fprintf(ofp, "%4d %s\n", repeats + 1, str); 186 if ((dflag && repeats) || (uflag && !repeats)) 187 (void)fprintf(ofp, "%s\n", str); 188 } 189 190 char * 191 skip(char *str) 192 { 193 int nchars, nfields; 194 195 for (nfields = 0; *str != '\0' && nfields++ != numfields; ) { 196 while (isblank((unsigned char)*str)) 197 str++; 198 while (*str != '\0' && !isblank((unsigned char)*str)) 199 str++; 200 } 201 for (nchars = numchars; nchars-- && *str; ++str); 202 return(str); 203 } 204 205 FILE * 206 file(const char *name, const char *mode) 207 { 208 FILE *fp; 209 210 if ((fp = fopen(name, mode)) == NULL) 211 err(1, "%s", name); 212 return(fp); 213 } 214 215 void 216 obsolete(char **argv) 217 { 218 int len; 219 char *ap, *p, *start; 220 221 while ((ap = *++argv)) { 222 /* Return if "--" or not an option of any form. */ 223 if (ap[0] != '-') { 224 if (ap[0] != '+') 225 return; 226 } else if (ap[1] == '-') 227 return; 228 if (!isdigit((unsigned char)ap[1])) 229 continue; 230 /* 231 * Digit signifies an old-style option. Malloc space for dash, 232 * new option and argument. 233 */ 234 len = strlen(ap); 235 if ((start = p = malloc(len + 3)) == NULL) 236 errx(1, "malloc"); 237 *p++ = '-'; 238 *p++ = ap[0] == '+' ? 's' : 'f'; 239 (void)strcpy(p, ap + 1); 240 *argv = start; 241 } 242 } 243 244 static void 245 usage(void) 246 { 247 (void)fprintf(stderr, 248 "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); 249 exit(1); 250 } 251 252 int 253 stricoll(char *s1, char *s2) 254 { 255 char *p, line1[MAXLINELEN], line2[MAXLINELEN]; 256 257 for (p = line1; *s1; s1++) 258 *p++ = tolower((unsigned char)*s1); 259 *p = '\0'; 260 for (p = line2; *s2; s2++) 261 *p++ = tolower((unsigned char)*s2); 262 *p = '\0'; 263 return strcoll(line1, line2); 264 } 265