xref: /openbsd/usr.bin/uniq/uniq.c (revision 404b540a)
1 /*	$OpenBSD: uniq.c,v 1.17 2007/11/11 17:50:29 kili Exp $	*/
2 /*	$NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Case Larsen.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef lint
37 static char copyright[] =
38 "@(#) Copyright (c) 1989, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n";
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)uniq.c	8.3 (Berkeley) 5/4/95";
45 #endif
46 static char rcsid[] = "$OpenBSD: uniq.c,v 1.17 2007/11/11 17:50:29 kili Exp $";
47 #endif /* not lint */
48 
49 #include <ctype.h>
50 #include <err.h>
51 #include <errno.h>
52 #include <limits.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <unistd.h>
57 
58 #define	MAXLINELEN	(8 * 1024)
59 
60 int cflag, dflag, uflag;
61 int numchars, numfields, repeats;
62 
63 FILE	*file(char *, char *);
64 void	 show(FILE *, char *);
65 char	*skip(char *);
66 void	 obsolete(char *[]);
67 __dead void	usage(void);
68 
69 int
70 main(int argc, char *argv[])
71 {
72 	char *t1, *t2;
73 	FILE *ifp = NULL, *ofp = NULL;
74 	int ch;
75 	char *prevline, *thisline;
76 
77 	obsolete(argv);
78 	while ((ch = getopt(argc, argv, "cdf:s:u")) != -1) {
79 		const char *errstr;
80 
81 		switch (ch) {
82 		case 'c':
83 			cflag = 1;
84 			break;
85 		case 'd':
86 			dflag = 1;
87 			break;
88 		case 'f':
89 			numfields = (int)strtonum(optarg, 0, INT_MAX,
90 			    &errstr);
91 			if (errstr)
92 				errx(1, "field skip value is %s: %s",
93 				    errstr, optarg);
94 			break;
95 		case 's':
96 			numchars = (int)strtonum(optarg, 0, INT_MAX,
97 			    &errstr);
98 			if (errstr)
99 				errx(1,
100 				    "character skip value is %s: %s",
101 				    errstr, optarg);
102 			break;
103 		case 'u':
104 			uflag = 1;
105 			break;
106 		default:
107 			usage();
108 		}
109 	}
110 
111 	argc -= optind;
112 	argv += optind;
113 
114 	/* If neither -d nor -u are set, default is -d -u. */
115 	if (!dflag && !uflag)
116 		dflag = uflag = 1;
117 
118 	switch(argc) {
119 	case 0:
120 		ifp = stdin;
121 		ofp = stdout;
122 		break;
123 	case 1:
124 		ifp = file(argv[0], "r");
125 		ofp = stdout;
126 		break;
127 	case 2:
128 		ifp = file(argv[0], "r");
129 		ofp = file(argv[1], "w");
130 		break;
131 	default:
132 		usage();
133 	}
134 
135 	prevline = malloc(MAXLINELEN);
136 	thisline = malloc(MAXLINELEN);
137 	if (prevline == NULL || thisline == NULL)
138 		err(1, "malloc");
139 
140 	if (fgets(prevline, MAXLINELEN, ifp) == NULL)
141 		exit(0);
142 
143 	while (fgets(thisline, MAXLINELEN, ifp)) {
144 		/* If requested get the chosen fields + character offsets. */
145 		if (numfields || numchars) {
146 			t1 = skip(thisline);
147 			t2 = skip(prevline);
148 		} else {
149 			t1 = thisline;
150 			t2 = prevline;
151 		}
152 
153 		/* If different, print; set previous to new value. */
154 		if (strcmp(t1, t2)) {
155 			show(ofp, prevline);
156 			t1 = prevline;
157 			prevline = thisline;
158 			thisline = t1;
159 			repeats = 0;
160 		} else
161 			++repeats;
162 	}
163 	show(ofp, prevline);
164 	exit(0);
165 }
166 
167 /*
168  * show --
169  *	Output a line depending on the flags and number of repetitions
170  *	of the line.
171  */
172 void
173 show(FILE *ofp, char *str)
174 {
175 	if ((dflag && repeats) || (uflag && !repeats)) {
176 		if (cflag)
177 			(void)fprintf(ofp, "%4d %s", repeats + 1, str);
178 		else
179 			(void)fprintf(ofp, "%s", str);
180 	}
181 }
182 
183 char *
184 skip(char *str)
185 {
186 	int nchars, nfields;
187 
188 	for (nfields = numfields; nfields && *str; nfields--) {
189 		while (isblank(*str))
190 			str++;
191 		while (*str && !isblank(*str))
192 			str++;
193 	}
194 	for (nchars = numchars; nchars-- && *str && *str != '\n'; ++str)
195 		;
196 	return (str);
197 }
198 
199 FILE *
200 file(char *name, char *mode)
201 {
202 	FILE *fp;
203 
204 	if (strcmp(name, "-") == 0)
205 		return(*mode == 'r' ? stdin : stdout);
206 	if ((fp = fopen(name, mode)) == NULL)
207 		err(1, "%s", name);
208 	return (fp);
209 }
210 
211 void
212 obsolete(char *argv[])
213 {
214 	size_t len;
215 	char *ap, *p, *start;
216 
217 	while ((ap = *++argv)) {
218 		/* Return if "--" or not an option of any form. */
219 		if (ap[0] != '-') {
220 			if (ap[0] != '+')
221 				return;
222 		} else if (ap[1] == '-')
223 			return;
224 		if (!isdigit(ap[1]))
225 			continue;
226 		/*
227 		 * Digit signifies an old-style option.  Malloc space for dash,
228 		 * new option and argument.
229 		 */
230 		len = strlen(ap) + 3;
231 		if ((start = p = malloc(len)) == NULL)
232 			err(1, "malloc");
233 		*p++ = '-';
234 		*p++ = ap[0] == '+' ? 's' : 'f';
235 		(void)strlcpy(p, ap + 1, len - 2);
236 		*argv = start;
237 	}
238 }
239 
240 __dead void
241 usage(void)
242 {
243 	extern char *__progname;
244 
245 	(void)fprintf(stderr,
246 	    "usage: %s [-c] [-d | -u] [-f fields] [-s chars] [input_file [output_file]]\n",
247 	    __progname);
248 	exit(1);
249 }
250