1 /*
2 * wc - word count
3 *
4 * Gunnar Ritter, Freiburg i. Br., Germany, December 2000.
5 */
6 /*
7 * Copyright (c) 2003 Gunnar Ritter
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the authors be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must not
18 * claim that you wrote the original software. If you use this software
19 * in a product, an acknowledgment in the product documentation would be
20 * appreciated but is not required.
21 *
22 * 2. Altered source versions must be plainly marked as such, and must not be
23 * misrepresented as being the original software.
24 *
25 * 3. This notice may not be removed or altered from any source distribution.
26 */
27
28 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
29 #define USED __attribute__ ((used))
30 #elif defined __GNUC__
31 #define USED __attribute__ ((unused))
32 #else
33 #define USED
34 #endif
35 #if defined (S42)
36 static const char sccsid[] USED = "@(#)wc_s42.sl 1.42 (gritter) 5/29/05";
37 #elif defined (SUS)
38 static const char sccsid[] USED = "@(#)wc_sus.sl 1.42 (gritter) 5/29/05";
39 #else
40 static const char sccsid[] USED = "@(#)wc.sl 1.42 (gritter) 5/29/05";
41 #endif
42
43 #include <sys/types.h>
44 #include <sys/stat.h>
45 #include <fcntl.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <wchar.h>
50 #include <locale.h>
51 #include <ctype.h>
52 #include <wctype.h>
53 #include <unistd.h>
54 #include <libgen.h>
55 #include <limits.h>
56
57 #include <iblok.h>
58 #include <blank.h>
59 #include <mbtowi.h>
60
61 static unsigned errcnt; /* count of errors */
62 static int cflag; /* count bytes only */
63 static int lflag; /* count lines only */
64 static int mflag; /* count characters only */
65 static int wflag; /* count words only */
66 static int illflag; /* illegal flag given */
67 static long long bytec; /* byte count */
68 static long long charc; /* character count */
69 static long long wordc; /* word count */
70 static long long linec; /* line count */
71 static long long chart; /* total character count */
72 static long long bytet; /* total byte count */
73 static long long wordt; /* total word count */
74 static long long linet; /* total line count */
75 #if defined (S42)
76 static int putspace; /* wrote space to output line */
77 #endif
78 static char *progname; /* argv0 to main */
79 static int mb_cur_max; /* MB_CUR_MAX */
80
81 static void usage(void);
82
83 /*
84 * Format output.
85 */
86 static void
report(unsigned long long count)87 report(unsigned long long count)
88 {
89 #if defined (S42)
90 if (putspace++)
91 printf(" ");
92 printf("%llu", count);
93 #else /* !S42 */
94 printf("%7llu ", count);
95 #endif /* !S42 */
96 }
97
98 #ifdef SUS
99 #define COUNT(c) \
100 if (isspace(c)) { \
101 if ((c) == '\n') \
102 linec++; \
103 hadspace++; \
104 } else { \
105 if (hadspace) { \
106 hadspace = 0; \
107 wordc++; \
108 } \
109 }
110 #else /* !SUS */
111 #define COUNT(c) \
112 if ((c) == '\n') { \
113 linec++; \
114 hadspace++; \
115 } else if (blank[c]) { \
116 hadspace++; \
117 } else { \
118 if (hadspace && graph[c]) { \
119 hadspace = 0; \
120 wordc++; \
121 } \
122 }
123 #endif /* !SUS */
124
125 static int
sbwc(struct iblok * ip)126 sbwc(struct iblok *ip)
127 {
128 register long long hadspace = 1;
129 register int c, i;
130 size_t sz;
131 #ifndef SUS
132 /*
133 * If SUS is defined, this optimization brings no measurable
134 * performance gain.
135 */
136 int blank[256], graph[256];
137
138 for (c = 0; c < 256; c++) {
139 blank[c] = isblank(c);
140 graph[c] = isgraph(c);
141 }
142 #endif /* !SUS */
143
144 while (ib_read(ip) != EOF) {
145 ip->ib_cur--;
146 sz = ip->ib_end - ip->ib_cur;
147 charc += sz;
148 for (i = 0; i < sz; i++) {
149 c = ip->ib_cur[i] & 0377;
150 COUNT(c)
151 }
152 }
153 bytec = charc;
154 return 0;
155 }
156
157 static int
mbwc(struct iblok * ip)158 mbwc(struct iblok *ip)
159 {
160 register long long hadspace = 1;
161 char mb[MB_LEN_MAX];
162 wint_t c;
163 int i, k, n;
164 size_t sz;
165 char *curp;
166 int eof = 0;
167 #ifndef SUS
168 /*
169 * If SUS is defined, this optimization brings no measurable
170 * performance gain.
171 */
172 int blank[128], graph[128];
173
174 for (c = 0; c < 128; c++) {
175 blank[c] = isblank(c);
176 graph[c] = isgraph(c);
177 }
178 #endif /* !SUS */
179
180 while (eof == 0) {
181 if (ip->ib_cur == ip->ib_end) {
182 if (ib_read(ip) == EOF)
183 break;
184 curp = ip->ib_cur - 1;
185 } else
186 curp = ip->ib_cur;
187 ip->ib_cur = ip->ib_end;
188 sz = ip->ib_end - curp;
189 bytec += sz;
190 /*
191 * Incrementing charc here en bloc and decrementing it
192 * later for multibyte characters if necessary is
193 * considerably faster with ASCII files.
194 */
195 charc += sz;
196 for (i = 0; i < sz; i++) {
197 if (curp[i] & 0200) {
198 if (sz - i < mb_cur_max) {
199 for (k = 0; k < sz - i; k++)
200 mb[k] = curp[i+k];
201 while (eof == 0 && k < mb_cur_max) {
202 if ((c = ib_get(ip)) != EOF) {
203 mb[k++] = c;
204 bytec++;
205 charc++;
206 } else
207 eof = 1;
208 }
209 curp = mb;
210 sz = k;
211 i = 0;
212 }
213 n = mbtowi(&c, &curp[i], sz - i);
214 if (n < 0) {
215 charc--;
216 continue;
217 }
218 charc -= n - 1;
219 i += n - 1;
220 #ifdef SUS
221 if (iswspace(c)) {
222 if (c == '\n')
223 linec++;
224 hadspace++;
225 } else {
226 if (hadspace) {
227 hadspace = 0;
228 wordc++;
229 }
230 }
231 #else /* !SUS */
232 if (c == '\n') {
233 linec++;
234 hadspace++;
235 } else if (iswblank(c)) {
236 hadspace++;
237 } else {
238 if (hadspace && iswgraph(c)) {
239 hadspace = 0;
240 wordc++;
241 }
242 }
243 #endif /* !SUS */
244 } else /* isascii(c) */ {
245 c = curp[i];
246 COUNT(c)
247 }
248 }
249 }
250 return 0;
251 }
252
253 /*
254 * Count a file.
255 */
256 static int
fpwc(int fd)257 fpwc(int fd)
258 {
259 struct iblok *ip;
260
261 bytec = charc = wordc = linec = 0;
262 ip = ib_alloc(fd, 0);
263 if ((mb_cur_max > 1 && (wflag || mflag) ? mbwc : sbwc)(ip)) {
264 ib_free(ip);
265 return -1;
266 }
267 ib_free(ip);
268 if (illflag)
269 usage();
270 #if defined (S42)
271 putspace = 0;
272 #endif
273 if (lflag)
274 report(linec);
275 if (wflag)
276 report(wordc);
277 if (cflag)
278 report(bytec);
279 if (mflag)
280 report(charc);
281 linet += linec;
282 wordt += wordc;
283 chart += charc;
284 bytet += bytec;
285 return 0;
286 }
287
288 /*
289 * Count a file given by its name.
290 */
291 static int
filewc(const char * fn)292 filewc(const char *fn)
293 {
294 int fd;
295 int r;
296
297 if ((fd = open(fn, O_RDONLY)) < 0) {
298 fprintf(stderr, "%s: cannot open %s\n", progname, fn);
299 errcnt |= 1;
300 return 0;
301 }
302 if ((r = fpwc(fd)) >= 0)
303 printf(" %s\n", fn);
304 if (fd > 0)
305 close(fd);
306 return r >= 0 ? 1 : 0;
307 }
308
309 static void
usage(void)310 usage(void)
311 {
312 fprintf(stderr, "usage: %s [-clw] [name ...]\n", progname);
313 exit(2);
314 }
315
316 int
main(int argc,char ** argv)317 main(int argc, char **argv)
318 {
319 int c;
320 unsigned ac;
321
322 #ifdef __GLIBC__
323 putenv("POSIXLY_CORRECT=1");
324 #endif
325 progname = basename(argv[0]);
326 setlocale(LC_CTYPE, "");
327 mb_cur_max = MB_CUR_MAX;
328 ac = 0;
329 while ((c = getopt(argc, argv, ":clmw")) != EOF) {
330 ac++;
331 switch (c) {
332 case 'c':
333 cflag = ac;
334 break;
335 case 'l':
336 lflag = ac;
337 break;
338 case 'm':
339 mflag = ac;
340 break;
341 case 'w':
342 wflag = ac;
343 break;
344 default:
345 illflag = 1;
346 }
347 }
348 if (ac == 0) {
349 #if !defined (SUS) && !defined(S42)
350 if (argv[1] && argv[1][0] == '-' && argv[1][1] == '\0')
351 optind++;
352 else
353 #endif /* !SUS, !S42 */
354 cflag = lflag = wflag = 1;
355 }
356 if (mflag)
357 cflag = 0;
358 if (optind < argc) {
359 ac = 0;
360 while (optind < argc)
361 ac += filewc(argv[optind++]);
362 if (ac > 1) {
363 #if defined (S42)
364 putspace = 0;
365 #endif
366 if (lflag)
367 report(linet);
368 if (wflag)
369 report(wordt);
370 if (cflag)
371 report(bytet);
372 if (mflag)
373 report(chart);
374 printf(" total\n");
375 }
376 } else {
377 fpwc(0);
378 printf("\n");
379 }
380 return errcnt;
381 }
382