1 /* $OpenBSD: wc.c,v 1.32 2024/09/11 03:57:14 guenther Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1987, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/stat.h>
33
34 #include <fcntl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <locale.h>
38 #include <ctype.h>
39 #include <err.h>
40 #include <unistd.h>
41 #include <util.h>
42 #include <wchar.h>
43 #include <wctype.h>
44
45 #define _MAXBSIZE (64 * 1024)
46
47 int64_t tlinect, twordct, tcharct;
48 int doline, doword, dochar, humanchar, multibyte;
49 int rval;
50 extern char *__progname;
51
52 static void print_counts(int64_t, int64_t, int64_t, const char *);
53 static void format_and_print(int64_t);
54 static void cnt(const char *);
55
56 int
main(int argc,char * argv[])57 main(int argc, char *argv[])
58 {
59 int ch;
60
61 setlocale(LC_CTYPE, "");
62
63 if (pledge("stdio rpath", NULL) == -1)
64 err(1, "pledge");
65
66 while ((ch = getopt(argc, argv, "lwchm")) != -1)
67 switch(ch) {
68 case 'l':
69 doline = 1;
70 break;
71 case 'w':
72 doword = 1;
73 break;
74 case 'm':
75 if (MB_CUR_MAX > 1)
76 multibyte = 1;
77 /* FALLTHROUGH */
78 case 'c':
79 dochar = 1;
80 break;
81 case 'h':
82 humanchar = 1;
83 break;
84 default:
85 fprintf(stderr,
86 "usage: %s [-c | -m] [-hlw] [file ...]\n",
87 __progname);
88 return 1;
89 }
90 argv += optind;
91 argc -= optind;
92
93 /*
94 * wc is unusual in that its flags are on by default, so,
95 * if you don't get any arguments, you have to turn them
96 * all on.
97 */
98 if (!doline && !doword && !dochar)
99 doline = doword = dochar = 1;
100
101 if (!*argv) {
102 cnt(NULL);
103 } else {
104 int dototal = (argc > 1);
105
106 do {
107 cnt(*argv);
108 } while(*++argv);
109
110 if (dototal)
111 print_counts(tlinect, twordct, tcharct, "total");
112 }
113
114 return rval;
115 }
116
117 static void
cnt(const char * path)118 cnt(const char *path)
119 {
120 static char *buf;
121 static size_t bufsz;
122
123 FILE *stream;
124 const char *file;
125 char *C;
126 wchar_t wc;
127 short gotsp;
128 ssize_t len;
129 int64_t linect, wordct, charct;
130 struct stat sbuf;
131 int fd;
132
133 linect = wordct = charct = 0;
134 stream = NULL;
135 if (path != NULL) {
136 file = path;
137 if ((fd = open(file, O_RDONLY)) == -1) {
138 warn("%s", file);
139 rval = 1;
140 return;
141 }
142 } else {
143 file = "(stdin)";
144 fd = STDIN_FILENO;
145 }
146
147 if (!multibyte) {
148 if (bufsz < _MAXBSIZE &&
149 (buf = realloc(buf, _MAXBSIZE)) == NULL)
150 err(1, NULL);
151
152 /*
153 * According to POSIX, a word is a "maximal string of
154 * characters delimited by whitespace." Nothing is said
155 * about a character being printing or non-printing.
156 */
157 if (doword) {
158 gotsp = 1;
159 while ((len = read(fd, buf, _MAXBSIZE)) > 0) {
160 charct += len;
161 for (C = buf; len--; ++C) {
162 if (isspace((unsigned char)*C)) {
163 gotsp = 1;
164 if (*C == '\n')
165 ++linect;
166 } else if (gotsp) {
167 gotsp = 0;
168 ++wordct;
169 }
170 }
171 }
172 if (len == -1) {
173 warn("%s", file);
174 rval = 1;
175 }
176 }
177 /*
178 * Line counting is split out because it's a lot
179 * faster to get lines than to get words, since
180 * the word count requires some logic.
181 */
182 else if (doline) {
183 while ((len = read(fd, buf, _MAXBSIZE)) > 0) {
184 charct += len;
185 for (C = buf; len--; ++C)
186 if (*C == '\n')
187 ++linect;
188 }
189 if (len == -1) {
190 warn("%s", file);
191 rval = 1;
192 }
193 }
194 /*
195 * If all we need is the number of characters and
196 * it's a directory or a regular file, just stat
197 * our handle. We avoid testing for it not being
198 * a special device in case someone adds a new type
199 * of inode.
200 */
201 else if (dochar) {
202 if (fstat(fd, &sbuf)) {
203 warn("%s", file);
204 rval = 1;
205 } else {
206 if (S_ISREG(sbuf.st_mode) || S_ISDIR(sbuf.st_mode))
207 charct = sbuf.st_size;
208 else {
209 while ((len = read(fd, buf, _MAXBSIZE)) > 0)
210 charct += len;
211 if (len == -1) {
212 warn("%s", file);
213 rval = 1;
214 }
215 }
216 }
217 }
218 } else {
219 if (path == NULL)
220 stream = stdin;
221 else if ((stream = fdopen(fd, "r")) == NULL) {
222 warn("%s", file);
223 close(fd);
224 rval = 1;
225 return;
226 }
227
228 gotsp = 1;
229 while ((len = getline(&buf, &bufsz, stream)) > 0) {
230 const char *end = buf + len;
231 for (C = buf; C < end; C += len) {
232 ++charct;
233 len = mbtowc(&wc, C, MB_CUR_MAX);
234 if (len == -1) {
235 mbtowc(NULL, NULL,
236 MB_CUR_MAX);
237 len = 1;
238 wc = L'?';
239 } else if (len == 0)
240 len = 1;
241 if (iswspace(wc)) {
242 gotsp = 1;
243 if (wc == L'\n')
244 ++linect;
245 } else if (gotsp) {
246 gotsp = 0;
247 ++wordct;
248 }
249 }
250 }
251 if (ferror(stream)) {
252 warn("%s", file);
253 rval = 1;
254 }
255 }
256
257 print_counts(linect, wordct, charct, path);
258
259 /*
260 * Don't bother checking doline, doword, or dochar -- speeds
261 * up the common case
262 */
263 tlinect += linect;
264 twordct += wordct;
265 tcharct += charct;
266
267 if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
268 warn("%s", file);
269 rval = 1;
270 }
271 }
272
273 static void
format_and_print(int64_t v)274 format_and_print(int64_t v)
275 {
276 if (humanchar) {
277 char result[FMT_SCALED_STRSIZE];
278
279 fmt_scaled((long long)v, result);
280 printf("%7s", result);
281 } else {
282 printf(" %7lld", v);
283 }
284 }
285
286 static void
print_counts(int64_t lines,int64_t words,int64_t chars,const char * name)287 print_counts(int64_t lines, int64_t words, int64_t chars, const char *name)
288 {
289 if (doline)
290 format_and_print(lines);
291 if (doword)
292 format_and_print(words);
293 if (dochar)
294 format_and_print(chars);
295
296 if (name)
297 printf(" %s\n", name);
298 else
299 printf("\n");
300 }
301