xref: /dragonfly/usr.bin/wc/wc.c (revision caa7a3ee)
1 /*
2  * Copyright (c) 1980, 1987, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * @(#) Copyright (c) 1980, 1987, 1991, 1993 The Regents of the University of California.  All rights reserved.
30  * @(#)wc.c	8.1 (Berkeley) 6/6/93
31  * $FreeBSD: head/usr.bin/wc/wc.c 281617 2015-04-16 21:44:35Z bdrewery $
32  */
33 
34 #include <sys/param.h>
35 #include <sys/stat.h>
36 
37 #include <ctype.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <locale.h>
42 #include <stdint.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <wchar.h>
48 #include <wctype.h>
49 
50 static uintmax_t tlinect, twordct, tcharct, tlongline;
51 static int doline, doword, dochar, domulti, dolongline;
52 static volatile sig_atomic_t siginfo;
53 
54 static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
55 		    uintmax_t charct, uintmax_t llct);
56 static int	cnt(const char *);
57 static void	usage(void);
58 
59 static void
60 siginfo_handler(int sig __unused)
61 {
62 
63 	siginfo = 1;
64 }
65 
66 static void
67 reset_siginfo(void)
68 {
69 
70 	signal(SIGINFO, SIG_DFL);
71 	siginfo = 0;
72 }
73 
74 int
75 main(int argc, char *argv[])
76 {
77 	int ch, errors, total;
78 
79 	setlocale(LC_CTYPE, "");
80 
81 	while ((ch = getopt(argc, argv, "clmwL")) != -1) {
82 		switch (ch) {
83 		case 'l':
84 			doline = 1;
85 			break;
86 		case 'w':
87 			doword = 1;
88 			break;
89 		case 'c':
90 			dochar = 1;
91 			domulti = 0;
92 			break;
93 		case 'L':
94 			dolongline = 1;
95 			break;
96 		case 'm':
97 			domulti = 1;
98 			dochar = 0;
99 			break;
100 		case '?':
101 		default:
102 			usage();
103 		}
104 	}
105 	argv += optind;
106 	argc -= optind;
107 
108 	signal(SIGINFO, siginfo_handler);
109 
110 	/* Wc's flags are on by default. */
111 	if (doline + doword + dochar + domulti + dolongline == 0)
112 		doline = doword = dochar = 1;
113 
114 	errors = 0;
115 	total = 0;
116 	if (*argv == NULL) {
117 		if (cnt(NULL) != 0)
118 			++errors;
119 	} else {
120 		do {
121 			if (cnt(*argv) != 0)
122 				++errors;
123 			++total;
124 		} while(*++argv);
125 	}
126 
127 	if (total > 1)
128 		show_cnt("total", tlinect, twordct, tcharct, tlongline);
129 	exit(errors == 0 ? 0 : 1);
130 }
131 
132 static void
133 show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
134     uintmax_t charct, uintmax_t llct)
135 {
136 	FILE *out;
137 
138 	if (!siginfo)
139 		out = stdout;
140 	else {
141 		out = stderr;
142 		siginfo = 0;
143 	}
144 
145 	if (doline)
146 		fprintf(out, " %7ju", linect);
147 	if (doword)
148 		fprintf(out, " %7ju", wordct);
149 	if (dochar || domulti)
150 		fprintf(out, " %7ju", charct);
151 	if (dolongline)
152 		fprintf(out, " %7ju", llct);
153 	if (file != NULL)
154 		fprintf(out, " %s\n", file);
155 	else
156 		fprintf(out, "\n");
157 }
158 
159 static int
160 cnt(const char *file)
161 {
162 	struct stat sb;
163 	uintmax_t linect, wordct, charct, llct, tmpll;
164 	int fd, len, warned;
165 	size_t clen;
166 	short gotsp;
167 	u_char *p;
168 	u_char buf[MAXBSIZE];
169 	wchar_t wch;
170 	mbstate_t mbs;
171 
172 	linect = wordct = charct = llct = tmpll = 0;
173 	if (file == NULL) {
174 		fd = STDIN_FILENO;
175 	} else {
176 		if ((fd = open(file, O_RDONLY)) < 0) {
177 			warn("%s: open", file);
178 			return (1);
179 		}
180 		if (doword || (domulti && MB_CUR_MAX != 1))
181 			goto word;
182 		/*
183 		 * Line counting is split out because it's a lot faster to get
184 		 * lines than to get words, since the word count requires some
185 		 * logic.
186 		 */
187 		if (doline) {
188 			while ((len = read(fd, buf, MAXBSIZE))) {
189 				if (len == -1) {
190 					warn("%s: read", file);
191 					close(fd);
192 					return (1);
193 				}
194 				if (siginfo) {
195 					show_cnt(file, linect, wordct, charct,
196 					    llct);
197 				}
198 				charct += len;
199 				for (p = buf; len--; ++p) {
200 					if (*p == '\n') {
201 						if (tmpll > llct)
202 							llct = tmpll;
203 						tmpll = 0;
204 						++linect;
205 					} else
206 						tmpll++;
207 				}
208 			}
209 			reset_siginfo();
210 			tlinect += linect;
211 			if (dochar)
212 				tcharct += charct;
213 			if (dolongline) {
214 				if (llct > tlongline)
215 					tlongline = llct;
216 			}
217 			show_cnt(file, linect, wordct, charct, llct);
218 			close(fd);
219 			return (0);
220 		}
221 		/*
222 		 * If all we need is the number of characters and it's a
223 		 * regular file, just stat the puppy.
224 		 */
225 		if (dochar || domulti) {
226 			if (fstat(fd, &sb)) {
227 				warn("%s: fstat", file);
228 				close(fd);
229 				return (1);
230 			}
231 			if (S_ISREG(sb.st_mode)) {
232 				reset_siginfo();
233 				charct = sb.st_size;
234 				show_cnt(file, linect, wordct, charct, llct);
235 				tcharct += charct;
236 				close(fd);
237 				return (0);
238 			}
239 		}
240 	}
241 
242 	/* Do it the hard way... */
243 word:	gotsp = 1;
244 	warned = 0;
245 	memset(&mbs, 0, sizeof(mbs));
246 	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
247 		if (len == -1) {
248 			warn("%s: read", file != NULL ? file : "stdin");
249 			close(fd);
250 			return (1);
251 		}
252 		p = buf;
253 		while (len > 0) {
254 			if (siginfo)
255 				show_cnt(file, linect, wordct, charct, llct);
256 			if (!domulti || MB_CUR_MAX == 1) {
257 				clen = 1;
258 				wch = (unsigned char)*p;
259 			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
260 			    (size_t)-1) {
261 				if (!warned) {
262 					errno = EILSEQ;
263 					warn("%s",
264 					    file != NULL ? file : "stdin");
265 					warned = 1;
266 				}
267 				memset(&mbs, 0, sizeof(mbs));
268 				clen = 1;
269 				wch = (unsigned char)*p;
270 			} else if (clen == (size_t)-2)
271 				break;
272 			else if (clen == 0)
273 				clen = 1;
274 			charct++;
275 			if (wch != L'\n')
276 				tmpll++;
277 			len -= clen;
278 			p += clen;
279 			if (wch == L'\n') {
280 				if (tmpll > llct)
281 					llct = tmpll;
282 				tmpll = 0;
283 				++linect;
284 			}
285 			if (iswspace(wch))
286 				gotsp = 1;
287 			else if (gotsp) {
288 				gotsp = 0;
289 				++wordct;
290 			}
291 		}
292 	}
293 	reset_siginfo();
294 	if (domulti && MB_CUR_MAX > 1)
295 		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
296 			warn("%s", file != NULL ? file : "stdin");
297 	if (doline)
298 		tlinect += linect;
299 	if (doword)
300 		twordct += wordct;
301 	if (dochar || domulti)
302 		tcharct += charct;
303 	if (dolongline) {
304 		if (llct > tlongline)
305 			tlongline = llct;
306 	}
307 	show_cnt(file, linect, wordct, charct, llct);
308 	close(fd);
309 	return (0);
310 }
311 
312 static void
313 usage(void)
314 {
315 	fprintf(stderr, "usage: wc [-Lclmw] [file ...]\n");
316 	exit(1);
317 }
318