1 /* $OpenBSD: uniq.c,v 1.33 2022/01/01 18:20:52 cheloha Exp $ */
2 /* $NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $ */
3
4 /*
5 * Copyright (c) 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Case Larsen.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <ctype.h>
37 #include <err.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <strings.h>
44 #include <unistd.h>
45 #include <wchar.h>
46 #include <wctype.h>
47
48 long long numchars, numfields;
49 unsigned long long repeats;
50 int cflag, dflag, iflag, uflag;
51
52 void show(const char *);
53 char *skip(char *);
54 void obsolete(char *[]);
55 __dead void usage(void);
56
57 int
main(int argc,char * argv[])58 main(int argc, char *argv[])
59 {
60 const char *errstr;
61 char *p, *prevline, *t, *thisline, *tmp;
62 size_t prevsize, thissize, tmpsize;
63 ssize_t len;
64 int ch;
65
66 setlocale(LC_CTYPE, "");
67
68 if (pledge("stdio rpath wpath cpath", NULL) == -1)
69 err(1, "pledge");
70
71 obsolete(argv);
72 while ((ch = getopt(argc, argv, "cdf:is:u")) != -1) {
73 switch (ch) {
74 case 'c':
75 cflag = 1;
76 break;
77 case 'd':
78 dflag = 1;
79 break;
80 case 'f':
81 numfields = strtonum(optarg, 0, LLONG_MAX, &errstr);
82 if (errstr)
83 errx(1, "fields is %s: %s", errstr, optarg);
84 break;
85 case 'i':
86 iflag = 1;
87 break;
88 case 's':
89 numchars = strtonum(optarg, 0, LLONG_MAX, &errstr);
90 if (errstr)
91 errx(1, "chars is %s: %s", errstr, optarg);
92 break;
93 case 'u':
94 uflag = 1;
95 break;
96 default:
97 usage();
98 }
99 }
100 argc -= optind;
101 argv += optind;
102
103 /* If neither -d nor -u are set, default is -d -u. */
104 if (!dflag && !uflag)
105 dflag = uflag = 1;
106
107 if (argc > 2)
108 usage();
109 if (argc >= 1 && strcmp(argv[0], "-") != 0) {
110 if (freopen(argv[0], "r", stdin) == NULL)
111 err(1, "%s", argv[0]);
112 }
113 if (argc == 2 && strcmp(argv[1], "-") != 0) {
114 if (freopen(argv[1], "w", stdout) == NULL)
115 err(1, "%s", argv[1]);
116 }
117
118 if (pledge("stdio", NULL) == -1)
119 err(1, "pledge");
120
121 prevsize = 0;
122 prevline = NULL;
123 if ((len = getline(&prevline, &prevsize, stdin)) == -1) {
124 free(prevline);
125 if (ferror(stdin))
126 err(1, "getline");
127 return 0;
128 }
129 if (prevline[len - 1] == '\n')
130 prevline[len - 1] = '\0';
131 if (numfields || numchars)
132 p = skip(prevline);
133 else
134 p = prevline;
135
136 thissize = 0;
137 thisline = NULL;
138 while ((len = getline(&thisline, &thissize, stdin)) != -1) {
139 if (thisline[len - 1] == '\n')
140 thisline[len - 1] = '\0';
141
142 /* If requested get the chosen fields + character offsets. */
143 if (numfields || numchars)
144 t = skip(thisline);
145 else
146 t = thisline;
147
148 /* If different, print; set previous to new value. */
149 if ((iflag ? strcasecmp : strcmp)(p, t)) {
150 show(prevline);
151 tmp = prevline;
152 prevline = thisline;
153 thisline = tmp;
154 tmp = p;
155 p = t;
156 t = tmp;
157 tmpsize = prevsize;
158 prevsize = thissize;
159 thissize = tmpsize;
160 repeats = 0;
161 } else
162 ++repeats;
163 }
164 free(thisline);
165 if (ferror(stdin))
166 err(1, "getline");
167
168 show(prevline);
169 free(prevline);
170
171 return 0;
172 }
173
174 /*
175 * show --
176 * Output a line depending on the flags and number of repetitions
177 * of the line.
178 */
179 void
show(const char * str)180 show(const char *str)
181 {
182 if ((dflag && repeats) || (uflag && !repeats)) {
183 if (cflag)
184 printf("%4llu %s\n", repeats + 1, str);
185 else
186 printf("%s\n", str);
187 }
188 }
189
190 char *
skip(char * str)191 skip(char *str)
192 {
193 long long nchars, nfields;
194 wchar_t wc;
195 int len;
196 int field_started;
197
198 for (nfields = numfields; nfields && *str; nfields--) {
199 /* Skip one field, including preceding blanks. */
200 for (field_started = 0; *str != '\0'; str += len) {
201 if ((len = mbtowc(&wc, str, MB_CUR_MAX)) == -1) {
202 (void)mbtowc(NULL, NULL, MB_CUR_MAX);
203 wc = L'?';
204 len = 1;
205 }
206 if (iswblank(wc)) {
207 if (field_started)
208 break;
209 } else
210 field_started = 1;
211 }
212 }
213
214 /* Skip some additional characters. */
215 for (nchars = numchars; nchars-- && *str != '\0'; str += len)
216 if ((len = mblen(str, MB_CUR_MAX)) == -1)
217 len = 1;
218
219 return (str);
220 }
221
222 void
obsolete(char * argv[])223 obsolete(char *argv[])
224 {
225 size_t len;
226 char *ap, *p, *start;
227
228 while ((ap = *++argv)) {
229 /* Return if "--" or not an option of any form. */
230 if (ap[0] != '-') {
231 if (ap[0] != '+')
232 return;
233 } else if (ap[1] == '-')
234 return;
235 if (!isdigit((unsigned char)ap[1]))
236 continue;
237 /*
238 * Digit signifies an old-style option. Malloc space for dash,
239 * new option and argument.
240 */
241 len = strlen(ap) + 3;
242 if ((start = p = malloc(len)) == NULL)
243 err(1, "malloc");
244 *p++ = '-';
245 *p++ = ap[0] == '+' ? 's' : 'f';
246 (void)strlcpy(p, ap + 1, len - 2);
247 *argv = start;
248 }
249 }
250
251 __dead void
usage(void)252 usage(void)
253 {
254 fprintf(stderr,
255 "usage: %s [-ci] [-d | -u] [-f fields] [-s chars] [input_file [output_file]]\n",
256 getprogname());
257 exit(1);
258 }
259