xref: /openbsd/usr.bin/uniq/uniq.c (revision 3d8817e4)
1 /*	$OpenBSD: uniq.c,v 1.18 2009/10/27 23:59:46 deraadt Exp $	*/
2 /*	$NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Case Larsen.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <ctype.h>
37 #include <err.h>
38 #include <errno.h>
39 #include <limits.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 
45 #define	MAXLINELEN	(8 * 1024)
46 
47 int cflag, dflag, uflag;
48 int numchars, numfields, repeats;
49 
50 FILE	*file(char *, char *);
51 void	 show(FILE *, char *);
52 char	*skip(char *);
53 void	 obsolete(char *[]);
54 __dead void	usage(void);
55 
56 int
57 main(int argc, char *argv[])
58 {
59 	char *t1, *t2;
60 	FILE *ifp = NULL, *ofp = NULL;
61 	int ch;
62 	char *prevline, *thisline;
63 
64 	obsolete(argv);
65 	while ((ch = getopt(argc, argv, "cdf:s:u")) != -1) {
66 		const char *errstr;
67 
68 		switch (ch) {
69 		case 'c':
70 			cflag = 1;
71 			break;
72 		case 'd':
73 			dflag = 1;
74 			break;
75 		case 'f':
76 			numfields = (int)strtonum(optarg, 0, INT_MAX,
77 			    &errstr);
78 			if (errstr)
79 				errx(1, "field skip value is %s: %s",
80 				    errstr, optarg);
81 			break;
82 		case 's':
83 			numchars = (int)strtonum(optarg, 0, INT_MAX,
84 			    &errstr);
85 			if (errstr)
86 				errx(1,
87 				    "character skip value is %s: %s",
88 				    errstr, optarg);
89 			break;
90 		case 'u':
91 			uflag = 1;
92 			break;
93 		default:
94 			usage();
95 		}
96 	}
97 
98 	argc -= optind;
99 	argv += optind;
100 
101 	/* If neither -d nor -u are set, default is -d -u. */
102 	if (!dflag && !uflag)
103 		dflag = uflag = 1;
104 
105 	switch(argc) {
106 	case 0:
107 		ifp = stdin;
108 		ofp = stdout;
109 		break;
110 	case 1:
111 		ifp = file(argv[0], "r");
112 		ofp = stdout;
113 		break;
114 	case 2:
115 		ifp = file(argv[0], "r");
116 		ofp = file(argv[1], "w");
117 		break;
118 	default:
119 		usage();
120 	}
121 
122 	prevline = malloc(MAXLINELEN);
123 	thisline = malloc(MAXLINELEN);
124 	if (prevline == NULL || thisline == NULL)
125 		err(1, "malloc");
126 
127 	if (fgets(prevline, MAXLINELEN, ifp) == NULL)
128 		exit(0);
129 
130 	while (fgets(thisline, MAXLINELEN, ifp)) {
131 		/* If requested get the chosen fields + character offsets. */
132 		if (numfields || numchars) {
133 			t1 = skip(thisline);
134 			t2 = skip(prevline);
135 		} else {
136 			t1 = thisline;
137 			t2 = prevline;
138 		}
139 
140 		/* If different, print; set previous to new value. */
141 		if (strcmp(t1, t2)) {
142 			show(ofp, prevline);
143 			t1 = prevline;
144 			prevline = thisline;
145 			thisline = t1;
146 			repeats = 0;
147 		} else
148 			++repeats;
149 	}
150 	show(ofp, prevline);
151 	exit(0);
152 }
153 
154 /*
155  * show --
156  *	Output a line depending on the flags and number of repetitions
157  *	of the line.
158  */
159 void
160 show(FILE *ofp, char *str)
161 {
162 	if ((dflag && repeats) || (uflag && !repeats)) {
163 		if (cflag)
164 			(void)fprintf(ofp, "%4d %s", repeats + 1, str);
165 		else
166 			(void)fprintf(ofp, "%s", str);
167 	}
168 }
169 
170 char *
171 skip(char *str)
172 {
173 	int nchars, nfields;
174 
175 	for (nfields = numfields; nfields && *str; nfields--) {
176 		while (isblank(*str))
177 			str++;
178 		while (*str && !isblank(*str))
179 			str++;
180 	}
181 	for (nchars = numchars; nchars-- && *str && *str != '\n'; ++str)
182 		;
183 	return (str);
184 }
185 
186 FILE *
187 file(char *name, char *mode)
188 {
189 	FILE *fp;
190 
191 	if (strcmp(name, "-") == 0)
192 		return(*mode == 'r' ? stdin : stdout);
193 	if ((fp = fopen(name, mode)) == NULL)
194 		err(1, "%s", name);
195 	return (fp);
196 }
197 
198 void
199 obsolete(char *argv[])
200 {
201 	size_t len;
202 	char *ap, *p, *start;
203 
204 	while ((ap = *++argv)) {
205 		/* Return if "--" or not an option of any form. */
206 		if (ap[0] != '-') {
207 			if (ap[0] != '+')
208 				return;
209 		} else if (ap[1] == '-')
210 			return;
211 		if (!isdigit(ap[1]))
212 			continue;
213 		/*
214 		 * Digit signifies an old-style option.  Malloc space for dash,
215 		 * new option and argument.
216 		 */
217 		len = strlen(ap) + 3;
218 		if ((start = p = malloc(len)) == NULL)
219 			err(1, "malloc");
220 		*p++ = '-';
221 		*p++ = ap[0] == '+' ? 's' : 'f';
222 		(void)strlcpy(p, ap + 1, len - 2);
223 		*argv = start;
224 	}
225 }
226 
227 __dead void
228 usage(void)
229 {
230 	extern char *__progname;
231 
232 	(void)fprintf(stderr,
233 	    "usage: %s [-c] [-d | -u] [-f fields] [-s chars] [input_file [output_file]]\n",
234 	    __progname);
235 	exit(1);
236 }
237