xref: /dragonfly/usr.bin/wc/wc.c (revision 7485684f)
1 /*
2  * Copyright (c) 1980, 1987, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * @(#) Copyright (c) 1980, 1987, 1991, 1993 The Regents of the University of California.  All rights reserved.
30  * @(#)wc.c	8.1 (Berkeley) 6/6/93
31  * $FreeBSD: head/usr.bin/wc/wc.c 281617 2015-04-16 21:44:35Z bdrewery $
32  */
33 
34 #include <sys/param.h>
35 #include <sys/stat.h>
36 
37 #include <ctype.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <locale.h>
42 #include <stdint.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <wchar.h>
48 #include <wctype.h>
49 
50 static uintmax_t tlinect, twordct, tcharct, tlongline;
51 static int doline, doword, dochar, domulti, dolongline;
52 static volatile sig_atomic_t siginfo;
53 
54 static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
55 		    uintmax_t charct, uintmax_t llct);
56 static int	cnt(const char *);
57 static void	usage(void);
58 
59 #ifdef SIGINFO
60 static void
61 siginfo_handler(int sig __unused)
62 {
63 	siginfo = 1;
64 }
65 #endif
66 
67 static void
68 reset_siginfo(void)
69 {
70 #ifdef SIGINFO
71 	signal(SIGINFO, SIG_DFL);
72 #endif
73 	siginfo = 0;
74 }
75 
76 int
77 main(int argc, char *argv[])
78 {
79 	int ch, errors, total;
80 
81 	setlocale(LC_CTYPE, "");
82 
83 	while ((ch = getopt(argc, argv, "clmwL")) != -1) {
84 		switch (ch) {
85 		case 'l':
86 			doline = 1;
87 			break;
88 		case 'w':
89 			doword = 1;
90 			break;
91 		case 'c':
92 			dochar = 1;
93 			domulti = 0;
94 			break;
95 		case 'L':
96 			dolongline = 1;
97 			break;
98 		case 'm':
99 			domulti = 1;
100 			dochar = 0;
101 			break;
102 		case '?':
103 		default:
104 			usage();
105 		}
106 	}
107 	argv += optind;
108 	argc -= optind;
109 
110 #ifdef SIGINFO
111 	signal(SIGINFO, siginfo_handler);
112 #endif
113 
114 	/* Wc's flags are on by default. */
115 	if (doline + doword + dochar + domulti + dolongline == 0)
116 		doline = doword = dochar = 1;
117 
118 	errors = 0;
119 	total = 0;
120 	if (*argv == NULL) {
121 		if (cnt(NULL) != 0)
122 			++errors;
123 	} else {
124 		do {
125 			if (cnt(*argv) != 0)
126 				++errors;
127 			++total;
128 		} while(*++argv);
129 	}
130 
131 	if (total > 1)
132 		show_cnt("total", tlinect, twordct, tcharct, tlongline);
133 	exit(errors == 0 ? 0 : 1);
134 }
135 
136 static void
137 show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
138     uintmax_t charct, uintmax_t llct)
139 {
140 	FILE *out;
141 
142 	if (!siginfo)
143 		out = stdout;
144 	else {
145 		out = stderr;
146 		siginfo = 0;
147 	}
148 
149 	if (doline)
150 		fprintf(out, " %7ju", linect);
151 	if (doword)
152 		fprintf(out, " %7ju", wordct);
153 	if (dochar || domulti)
154 		fprintf(out, " %7ju", charct);
155 	if (dolongline)
156 		fprintf(out, " %7ju", llct);
157 	if (file != NULL)
158 		fprintf(out, " %s\n", file);
159 	else
160 		fprintf(out, "\n");
161 }
162 
163 static int
164 cnt(const char *file)
165 {
166 	struct stat sb;
167 	uintmax_t linect, wordct, charct, llct, tmpll;
168 	int fd, len, warned;
169 	size_t clen;
170 	short gotsp;
171 	u_char *p;
172 	u_char buf[MAXBSIZE];
173 	wchar_t wch;
174 	mbstate_t mbs;
175 
176 	linect = wordct = charct = llct = tmpll = 0;
177 	if (file == NULL) {
178 		fd = STDIN_FILENO;
179 	} else {
180 		if ((fd = open(file, O_RDONLY)) < 0) {
181 			warn("%s: open", file);
182 			return (1);
183 		}
184 		if (doword || (domulti && MB_CUR_MAX != 1))
185 			goto word;
186 		/*
187 		 * Line counting is split out because it's a lot faster to get
188 		 * lines than to get words, since the word count requires some
189 		 * logic.
190 		 */
191 		if (doline) {
192 			while ((len = read(fd, buf, MAXBSIZE))) {
193 				if (len == -1) {
194 					warn("%s: read", file);
195 					close(fd);
196 					return (1);
197 				}
198 				if (siginfo) {
199 					show_cnt(file, linect, wordct, charct,
200 					    llct);
201 				}
202 				charct += len;
203 				for (p = buf; len--; ++p) {
204 					if (*p == '\n') {
205 						if (tmpll > llct)
206 							llct = tmpll;
207 						tmpll = 0;
208 						++linect;
209 					} else
210 						tmpll++;
211 				}
212 			}
213 			reset_siginfo();
214 			tlinect += linect;
215 			if (dochar)
216 				tcharct += charct;
217 			if (dolongline) {
218 				if (llct > tlongline)
219 					tlongline = llct;
220 			}
221 			show_cnt(file, linect, wordct, charct, llct);
222 			close(fd);
223 			return (0);
224 		}
225 		/*
226 		 * If all we need is the number of characters and it's a
227 		 * regular file, just stat the puppy.
228 		 */
229 		if (dochar || domulti) {
230 			if (fstat(fd, &sb)) {
231 				warn("%s: fstat", file);
232 				close(fd);
233 				return (1);
234 			}
235 			/* Pseudo-filesystems advertize a zero size. */
236 			if (S_ISREG(sb.st_mode) && sb.st_size > 0) {
237 				reset_siginfo();
238 				charct = sb.st_size;
239 				show_cnt(file, linect, wordct, charct, llct);
240 				tcharct += charct;
241 				close(fd);
242 				return (0);
243 			}
244 		}
245 	}
246 
247 	/* Do it the hard way... */
248 word:	gotsp = 1;
249 	warned = 0;
250 	memset(&mbs, 0, sizeof(mbs));
251 	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
252 		if (len == -1) {
253 			warn("%s: read", file != NULL ? file : "stdin");
254 			close(fd);
255 			return (1);
256 		}
257 		p = buf;
258 		while (len > 0) {
259 			if (siginfo)
260 				show_cnt(file, linect, wordct, charct, llct);
261 			if (!domulti || MB_CUR_MAX == 1) {
262 				clen = 1;
263 				wch = (unsigned char)*p;
264 			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
265 			    (size_t)-1) {
266 				if (!warned) {
267 					errno = EILSEQ;
268 					warn("%s",
269 					    file != NULL ? file : "stdin");
270 					warned = 1;
271 				}
272 				memset(&mbs, 0, sizeof(mbs));
273 				clen = 1;
274 				wch = (unsigned char)*p;
275 			} else if (clen == (size_t)-2)
276 				break;
277 			else if (clen == 0)
278 				clen = 1;
279 			charct++;
280 			if (wch != L'\n')
281 				tmpll++;
282 			len -= clen;
283 			p += clen;
284 			if (wch == L'\n') {
285 				if (tmpll > llct)
286 					llct = tmpll;
287 				tmpll = 0;
288 				++linect;
289 			}
290 			if (iswspace(wch))
291 				gotsp = 1;
292 			else if (gotsp) {
293 				gotsp = 0;
294 				++wordct;
295 			}
296 		}
297 	}
298 	reset_siginfo();
299 	if (domulti && MB_CUR_MAX > 1)
300 		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
301 			warn("%s", file != NULL ? file : "stdin");
302 	if (doline)
303 		tlinect += linect;
304 	if (doword)
305 		twordct += wordct;
306 	if (dochar || domulti)
307 		tcharct += charct;
308 	if (dolongline) {
309 		if (llct > tlongline)
310 			tlongline = llct;
311 	}
312 	show_cnt(file, linect, wordct, charct, llct);
313 	close(fd);
314 	return (0);
315 }
316 
317 static void
318 usage(void)
319 {
320 	fprintf(stderr, "usage: wc [-Lclmw] [file ...]\n");
321 	exit(1);
322 }
323