1 /* $OpenBSD: wc.c,v 1.31 2022/12/04 23:50:50 cheloha Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1987, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/stat.h> 33 34 #include <fcntl.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <locale.h> 38 #include <ctype.h> 39 #include <err.h> 40 #include <unistd.h> 41 #include <util.h> 42 #include <wchar.h> 43 #include <wctype.h> 44 45 #define _MAXBSIZE (64 * 1024) 46 47 int64_t tlinect, twordct, tcharct; 48 int doline, doword, dochar, humanchar, multibyte; 49 int rval; 50 extern char *__progname; 51 52 static void print_counts(int64_t, int64_t, int64_t, const char *); 53 static void format_and_print(int64_t); 54 static void cnt(const char *); 55 56 int 57 main(int argc, char *argv[]) 58 { 59 int ch; 60 61 setlocale(LC_CTYPE, ""); 62 63 if (pledge("stdio rpath", NULL) == -1) 64 err(1, "pledge"); 65 66 while ((ch = getopt(argc, argv, "lwchm")) != -1) 67 switch(ch) { 68 case 'l': 69 doline = 1; 70 break; 71 case 'w': 72 doword = 1; 73 break; 74 case 'm': 75 if (MB_CUR_MAX > 1) 76 multibyte = 1; 77 /* FALLTHROUGH */ 78 case 'c': 79 dochar = 1; 80 break; 81 case 'h': 82 humanchar = 1; 83 break; 84 default: 85 fprintf(stderr, 86 "usage: %s [-c | -m] [-hlw] [file ...]\n", 87 __progname); 88 return 1; 89 } 90 argv += optind; 91 argc -= optind; 92 93 /* 94 * wc is unusual in that its flags are on by default, so, 95 * if you don't get any arguments, you have to turn them 96 * all on. 97 */ 98 if (!doline && !doword && !dochar) 99 doline = doword = dochar = 1; 100 101 if (!*argv) { 102 cnt(NULL); 103 } else { 104 int dototal = (argc > 1); 105 106 do { 107 cnt(*argv); 108 } while(*++argv); 109 110 if (dototal) 111 print_counts(tlinect, twordct, tcharct, "total"); 112 } 113 114 return rval; 115 } 116 117 static void 118 cnt(const char *path) 119 { 120 static char *buf; 121 static size_t bufsz; 122 123 FILE *stream; 124 const char *file; 125 char *C; 126 wchar_t wc; 127 short gotsp; 128 ssize_t len; 129 int64_t linect, wordct, charct; 130 struct stat sbuf; 131 int fd; 132 133 linect = wordct = charct = 0; 134 stream = NULL; 135 if (path != NULL) { 136 file = path; 137 if ((fd = open(file, O_RDONLY)) == -1) { 138 warn("%s", file); 139 rval = 1; 140 return; 141 } 142 } else { 143 file = "(stdin)"; 144 fd = STDIN_FILENO; 145 } 146 147 if (!multibyte) { 148 if (bufsz < _MAXBSIZE && 149 (buf = realloc(buf, _MAXBSIZE)) == NULL) 150 err(1, NULL); 151 152 /* 153 * According to POSIX, a word is a "maximal string of 154 * characters delimited by whitespace." Nothing is said 155 * about a character being printing or non-printing. 156 */ 157 if (doword) { 158 gotsp = 1; 159 while ((len = read(fd, buf, _MAXBSIZE)) > 0) { 160 charct += len; 161 for (C = buf; len--; ++C) { 162 if (isspace((unsigned char)*C)) { 163 gotsp = 1; 164 if (*C == '\n') 165 ++linect; 166 } else if (gotsp) { 167 gotsp = 0; 168 ++wordct; 169 } 170 } 171 } 172 if (len == -1) { 173 warn("%s", file); 174 rval = 1; 175 } 176 } 177 /* 178 * Line counting is split out because it's a lot 179 * faster to get lines than to get words, since 180 * the word count requires some logic. 181 */ 182 else if (doline) { 183 while ((len = read(fd, buf, _MAXBSIZE)) > 0) { 184 charct += len; 185 for (C = buf; len--; ++C) 186 if (*C == '\n') 187 ++linect; 188 } 189 if (len == -1) { 190 warn("%s", file); 191 rval = 1; 192 } 193 } 194 /* 195 * If all we need is the number of characters and 196 * it's a directory or a regular or linked file, just 197 * stat the puppy. We avoid testing for it not being 198 * a special device in case someone adds a new type 199 * of inode. 200 */ 201 else if (dochar) { 202 mode_t ifmt; 203 204 if (fstat(fd, &sbuf)) { 205 warn("%s", file); 206 rval = 1; 207 } else { 208 ifmt = sbuf.st_mode & S_IFMT; 209 if (ifmt == S_IFREG || ifmt == S_IFLNK 210 || ifmt == S_IFDIR) { 211 charct = sbuf.st_size; 212 } else { 213 while ((len = read(fd, buf, _MAXBSIZE)) > 0) 214 charct += len; 215 if (len == -1) { 216 warn("%s", file); 217 rval = 1; 218 } 219 } 220 } 221 } 222 } else { 223 if (path == NULL) 224 stream = stdin; 225 else if ((stream = fdopen(fd, "r")) == NULL) { 226 warn("%s", file); 227 close(fd); 228 rval = 1; 229 return; 230 } 231 232 gotsp = 1; 233 while ((len = getline(&buf, &bufsz, stream)) > 0) { 234 const char *end = buf + len; 235 for (C = buf; C < end; C += len) { 236 ++charct; 237 len = mbtowc(&wc, C, MB_CUR_MAX); 238 if (len == -1) { 239 mbtowc(NULL, NULL, 240 MB_CUR_MAX); 241 len = 1; 242 wc = L'?'; 243 } else if (len == 0) 244 len = 1; 245 if (iswspace(wc)) { 246 gotsp = 1; 247 if (wc == L'\n') 248 ++linect; 249 } else if (gotsp) { 250 gotsp = 0; 251 ++wordct; 252 } 253 } 254 } 255 if (ferror(stream)) { 256 warn("%s", file); 257 rval = 1; 258 } 259 } 260 261 print_counts(linect, wordct, charct, path); 262 263 /* 264 * Don't bother checking doline, doword, or dochar -- speeds 265 * up the common case 266 */ 267 tlinect += linect; 268 twordct += wordct; 269 tcharct += charct; 270 271 if ((stream == NULL ? close(fd) : fclose(stream)) != 0) { 272 warn("%s", file); 273 rval = 1; 274 } 275 } 276 277 static void 278 format_and_print(int64_t v) 279 { 280 if (humanchar) { 281 char result[FMT_SCALED_STRSIZE]; 282 283 fmt_scaled((long long)v, result); 284 printf("%7s", result); 285 } else { 286 printf(" %7lld", v); 287 } 288 } 289 290 static void 291 print_counts(int64_t lines, int64_t words, int64_t chars, const char *name) 292 { 293 if (doline) 294 format_and_print(lines); 295 if (doword) 296 format_and_print(words); 297 if (dochar) 298 format_and_print(chars); 299 300 if (name) 301 printf(" %s\n", name); 302 else 303 printf("\n"); 304 } 305