xref: /openbsd/usr.bin/wc/wc.c (revision 4bdff4be)
1 /*	$OpenBSD: wc.c,v 1.31 2022/12/04 23:50:50 cheloha Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1987, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/stat.h>
33 
34 #include <fcntl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <locale.h>
38 #include <ctype.h>
39 #include <err.h>
40 #include <unistd.h>
41 #include <util.h>
42 #include <wchar.h>
43 #include <wctype.h>
44 
45 #define	_MAXBSIZE (64 * 1024)
46 
47 int64_t	tlinect, twordct, tcharct;
48 int	doline, doword, dochar, humanchar, multibyte;
49 int	rval;
50 extern char *__progname;
51 
52 static void print_counts(int64_t, int64_t, int64_t, const char *);
53 static void format_and_print(int64_t);
54 static void cnt(const char *);
55 
56 int
57 main(int argc, char *argv[])
58 {
59 	int ch;
60 
61 	setlocale(LC_CTYPE, "");
62 
63 	if (pledge("stdio rpath", NULL) == -1)
64 		err(1, "pledge");
65 
66 	while ((ch = getopt(argc, argv, "lwchm")) != -1)
67 		switch(ch) {
68 		case 'l':
69 			doline = 1;
70 			break;
71 		case 'w':
72 			doword = 1;
73 			break;
74 		case 'm':
75 			if (MB_CUR_MAX > 1)
76 				multibyte = 1;
77 			/* FALLTHROUGH */
78 		case 'c':
79 			dochar = 1;
80 			break;
81 		case 'h':
82 			humanchar = 1;
83 			break;
84 		default:
85 			fprintf(stderr,
86 			    "usage: %s [-c | -m] [-hlw] [file ...]\n",
87 			    __progname);
88 			return 1;
89 		}
90 	argv += optind;
91 	argc -= optind;
92 
93 	/*
94 	 * wc is unusual in that its flags are on by default, so,
95 	 * if you don't get any arguments, you have to turn them
96 	 * all on.
97 	 */
98 	if (!doline && !doword && !dochar)
99 		doline = doword = dochar = 1;
100 
101 	if (!*argv) {
102 		cnt(NULL);
103 	} else {
104 		int dototal = (argc > 1);
105 
106 		do {
107 			cnt(*argv);
108 		} while(*++argv);
109 
110 		if (dototal)
111 			print_counts(tlinect, twordct, tcharct, "total");
112 	}
113 
114 	return rval;
115 }
116 
117 static void
118 cnt(const char *path)
119 {
120 	static char *buf;
121 	static size_t bufsz;
122 
123 	FILE *stream;
124 	const char *file;
125 	char *C;
126 	wchar_t wc;
127 	short gotsp;
128 	ssize_t len;
129 	int64_t linect, wordct, charct;
130 	struct stat sbuf;
131 	int fd;
132 
133 	linect = wordct = charct = 0;
134 	stream = NULL;
135 	if (path != NULL) {
136 		file = path;
137 		if ((fd = open(file, O_RDONLY)) == -1) {
138 			warn("%s", file);
139 			rval = 1;
140 			return;
141 		}
142 	} else  {
143 		file = "(stdin)";
144 		fd = STDIN_FILENO;
145 	}
146 
147 	if (!multibyte) {
148 		if (bufsz < _MAXBSIZE &&
149 		    (buf = realloc(buf, _MAXBSIZE)) == NULL)
150 			err(1, NULL);
151 
152 		/*
153 		 * According to POSIX, a word is a "maximal string of
154 		 * characters delimited by whitespace."  Nothing is said
155 		 * about a character being printing or non-printing.
156 		 */
157 		if (doword) {
158 			gotsp = 1;
159 			while ((len = read(fd, buf, _MAXBSIZE)) > 0) {
160 				charct += len;
161 				for (C = buf; len--; ++C) {
162 					if (isspace((unsigned char)*C)) {
163 						gotsp = 1;
164 						if (*C == '\n')
165 							++linect;
166 					} else if (gotsp) {
167 						gotsp = 0;
168 						++wordct;
169 					}
170 				}
171 			}
172 			if (len == -1) {
173 				warn("%s", file);
174 				rval = 1;
175 			}
176 		}
177 		/*
178 		 * Line counting is split out because it's a lot
179 		 * faster to get lines than to get words, since
180 		 * the word count requires some logic.
181 		 */
182 		else if (doline) {
183 			while ((len = read(fd, buf, _MAXBSIZE)) > 0) {
184 				charct += len;
185 				for (C = buf; len--; ++C)
186 					if (*C == '\n')
187 						++linect;
188 			}
189 			if (len == -1) {
190 				warn("%s", file);
191 				rval = 1;
192 			}
193 		}
194 		/*
195 		 * If all we need is the number of characters and
196 		 * it's a directory or a regular or linked file, just
197 		 * stat the puppy.  We avoid testing for it not being
198 		 * a special device in case someone adds a new type
199 		 * of inode.
200 		 */
201 		else if (dochar) {
202 			mode_t ifmt;
203 
204 			if (fstat(fd, &sbuf)) {
205 				warn("%s", file);
206 				rval = 1;
207 			} else {
208 				ifmt = sbuf.st_mode & S_IFMT;
209 				if (ifmt == S_IFREG || ifmt == S_IFLNK
210 				    || ifmt == S_IFDIR) {
211 					charct = sbuf.st_size;
212 				} else {
213 					while ((len = read(fd, buf, _MAXBSIZE)) > 0)
214 						charct += len;
215 					if (len == -1) {
216 						warn("%s", file);
217 						rval = 1;
218 					}
219 				}
220 			}
221 		}
222 	} else {
223 		if (path == NULL)
224 			stream = stdin;
225 		else if ((stream = fdopen(fd, "r")) == NULL) {
226 			warn("%s", file);
227 			close(fd);
228 			rval = 1;
229 			return;
230 		}
231 
232 		gotsp = 1;
233 		while ((len = getline(&buf, &bufsz, stream)) > 0) {
234 			const char *end = buf + len;
235 			for (C = buf; C < end; C += len) {
236 				++charct;
237 				len = mbtowc(&wc, C, MB_CUR_MAX);
238 				if (len == -1) {
239 					mbtowc(NULL, NULL,
240 					    MB_CUR_MAX);
241 					len = 1;
242 					wc = L'?';
243 				} else if (len == 0)
244 					len = 1;
245 				if (iswspace(wc)) {
246 					gotsp = 1;
247 					if (wc == L'\n')
248 						++linect;
249 				} else if (gotsp) {
250 					gotsp = 0;
251 					++wordct;
252 				}
253 			}
254 		}
255 		if (ferror(stream)) {
256 			warn("%s", file);
257 			rval = 1;
258 		}
259 	}
260 
261 	print_counts(linect, wordct, charct, path);
262 
263 	/*
264 	 * Don't bother checking doline, doword, or dochar -- speeds
265 	 * up the common case
266 	 */
267 	tlinect += linect;
268 	twordct += wordct;
269 	tcharct += charct;
270 
271 	if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
272 		warn("%s", file);
273 		rval = 1;
274 	}
275 }
276 
277 static void
278 format_and_print(int64_t v)
279 {
280 	if (humanchar) {
281 		char result[FMT_SCALED_STRSIZE];
282 
283 		fmt_scaled((long long)v, result);
284 		printf("%7s", result);
285 	} else {
286 		printf(" %7lld", v);
287 	}
288 }
289 
290 static void
291 print_counts(int64_t lines, int64_t words, int64_t chars, const char *name)
292 {
293 	if (doline)
294 		format_and_print(lines);
295 	if (doword)
296 		format_and_print(words);
297 	if (dochar)
298 		format_and_print(chars);
299 
300 	if (name)
301 		printf(" %s\n", name);
302 	else
303 		printf("\n");
304 }
305