xref: /openbsd/usr.bin/wc/wc.c (revision 771fbea0)
1 /*	$OpenBSD: wc.c,v 1.26 2019/06/28 13:35:05 deraadt Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1987, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/param.h>	/* MAXBSIZE */
33 #include <sys/stat.h>
34 
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <locale.h>
39 #include <ctype.h>
40 #include <err.h>
41 #include <unistd.h>
42 #include <util.h>
43 #include <wchar.h>
44 #include <wctype.h>
45 
46 int64_t	tlinect, twordct, tcharct;
47 int	doline, doword, dochar, humanchar, multibyte;
48 int	rval;
49 extern char *__progname;
50 
51 static void print_counts(int64_t, int64_t, int64_t, char *);
52 static void format_and_print(int64_t);
53 static void cnt(char *);
54 
55 int
56 main(int argc, char *argv[])
57 {
58 	int ch;
59 
60 	setlocale(LC_CTYPE, "");
61 
62 	if (pledge("stdio rpath", NULL) == -1)
63 		err(1, "pledge");
64 
65 	while ((ch = getopt(argc, argv, "lwchm")) != -1)
66 		switch(ch) {
67 		case 'l':
68 			doline = 1;
69 			break;
70 		case 'w':
71 			doword = 1;
72 			break;
73 		case 'm':
74 			if (MB_CUR_MAX > 1)
75 				multibyte = 1;
76 			/* FALLTHROUGH */
77 		case 'c':
78 			dochar = 1;
79 			break;
80 		case 'h':
81 			humanchar = 1;
82 			break;
83 		case '?':
84 		default:
85 			fprintf(stderr,
86 			    "usage: %s [-c | -m] [-hlw] [file ...]\n",
87 			    __progname);
88 			return 1;
89 		}
90 	argv += optind;
91 	argc -= optind;
92 
93 	/*
94 	 * wc is unusual in that its flags are on by default, so,
95 	 * if you don't get any arguments, you have to turn them
96 	 * all on.
97 	 */
98 	if (!doline && !doword && !dochar)
99 		doline = doword = dochar = 1;
100 
101 	if (!*argv) {
102 		cnt(NULL);
103 	} else {
104 		int dototal = (argc > 1);
105 
106 		do {
107 			cnt(*argv);
108 		} while(*++argv);
109 
110 		if (dototal)
111 			print_counts(tlinect, twordct, tcharct, "total");
112 	}
113 
114 	return rval;
115 }
116 
117 static void
118 cnt(char *file)
119 {
120 	static char *buf;
121 	static size_t bufsz;
122 
123 	FILE *stream;
124 	char *C;
125 	wchar_t wc;
126 	short gotsp;
127 	ssize_t len;
128 	int64_t linect, wordct, charct;
129 	struct stat sbuf;
130 	int fd;
131 
132 	linect = wordct = charct = 0;
133 	stream = NULL;
134 	if (file) {
135 		if ((fd = open(file, O_RDONLY, 0)) == -1) {
136 			warn("%s", file);
137 			rval = 1;
138 			return;
139 		}
140 	} else  {
141 		fd = STDIN_FILENO;
142 	}
143 
144 	if (!doword && !multibyte) {
145 		if (bufsz < MAXBSIZE &&
146 		    (buf = realloc(buf, MAXBSIZE)) == NULL)
147 			err(1, NULL);
148 		/*
149 		 * Line counting is split out because it's a lot
150 		 * faster to get lines than to get words, since
151 		 * the word count requires some logic.
152 		 */
153 		if (doline) {
154 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
155 				charct += len;
156 				for (C = buf; len--; ++C)
157 					if (*C == '\n')
158 						++linect;
159 			}
160 			if (len == -1) {
161 				warn("%s", file);
162 				rval = 1;
163 			}
164 		}
165 		/*
166 		 * If all we need is the number of characters and
167 		 * it's a directory or a regular or linked file, just
168 		 * stat the puppy.  We avoid testing for it not being
169 		 * a special device in case someone adds a new type
170 		 * of inode.
171 		 */
172 		else if (dochar) {
173 			mode_t ifmt;
174 
175 			if (fstat(fd, &sbuf)) {
176 				warn("%s", file);
177 				rval = 1;
178 			} else {
179 				ifmt = sbuf.st_mode & S_IFMT;
180 				if (ifmt == S_IFREG || ifmt == S_IFLNK
181 				    || ifmt == S_IFDIR) {
182 					charct = sbuf.st_size;
183 				} else {
184 					while ((len = read(fd, buf, MAXBSIZE)) > 0)
185 						charct += len;
186 					if (len == -1) {
187 						warn("%s", file);
188 						rval = 1;
189 					}
190 				}
191 			}
192 		}
193 	} else {
194 		if (file == NULL)
195 			stream = stdin;
196 		else if ((stream = fdopen(fd, "r")) == NULL) {
197 			warn("%s", file);
198 			close(fd);
199 			rval = 1;
200 			return;
201 		}
202 
203 		/*
204 		 * Do it the hard way.
205 		 * According to POSIX, a word is a "maximal string of
206 		 * characters delimited by whitespace."  Nothing is said
207 		 * about a character being printing or non-printing.
208 		 */
209 		gotsp = 1;
210 		while ((len = getline(&buf, &bufsz, stream)) > 0) {
211 			if (multibyte) {
212 				const char *end = buf + len;
213 				for (C = buf; C < end; C += len) {
214 					++charct;
215 					len = mbtowc(&wc, C, MB_CUR_MAX);
216 					if (len == -1) {
217 						mbtowc(NULL, NULL,
218 						    MB_CUR_MAX);
219 						len = 1;
220 						wc = L'?';
221 					} else if (len == 0)
222 						len = 1;
223 					if (iswspace(wc)) {
224 						gotsp = 1;
225 						if (wc == L'\n')
226 							++linect;
227 					} else if (gotsp) {
228 						gotsp = 0;
229 						++wordct;
230 					}
231 				}
232 			} else {
233 				charct += len;
234 				for (C = buf; len--; ++C) {
235 					if (isspace((unsigned char)*C)) {
236 						gotsp = 1;
237 						if (*C == '\n')
238 							++linect;
239 					} else if (gotsp) {
240 						gotsp = 0;
241 						++wordct;
242 					}
243 				}
244 			}
245 		}
246 		if (ferror(stream)) {
247 			warn("%s", file);
248 			rval = 1;
249 		}
250 	}
251 
252 	print_counts(linect, wordct, charct, file);
253 
254 	/*
255 	 * Don't bother checking doline, doword, or dochar -- speeds
256 	 * up the common case
257 	 */
258 	tlinect += linect;
259 	twordct += wordct;
260 	tcharct += charct;
261 
262 	if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
263 		warn("%s", file);
264 		rval = 1;
265 	}
266 }
267 
268 static void
269 format_and_print(int64_t v)
270 {
271 	if (humanchar) {
272 		char result[FMT_SCALED_STRSIZE];
273 
274 		fmt_scaled((long long)v, result);
275 		printf("%7s", result);
276 	} else {
277 		printf(" %7lld", v);
278 	}
279 }
280 
281 static void
282 print_counts(int64_t lines, int64_t words, int64_t chars, char *name)
283 {
284 	if (doline)
285 		format_and_print(lines);
286 	if (doword)
287 		format_and_print(words);
288 	if (dochar)
289 		format_and_print(chars);
290 
291 	if (name)
292 		printf(" %s\n", name);
293 	else
294 		printf("\n");
295 }
296