1 /* $OpenBSD: wc.c,v 1.26 2019/06/28 13:35:05 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1987, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> /* MAXBSIZE */ 33 #include <sys/stat.h> 34 35 #include <fcntl.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <locale.h> 39 #include <ctype.h> 40 #include <err.h> 41 #include <unistd.h> 42 #include <util.h> 43 #include <wchar.h> 44 #include <wctype.h> 45 46 int64_t tlinect, twordct, tcharct; 47 int doline, doword, dochar, humanchar, multibyte; 48 int rval; 49 extern char *__progname; 50 51 static void print_counts(int64_t, int64_t, int64_t, char *); 52 static void format_and_print(int64_t); 53 static void cnt(char *); 54 55 int 56 main(int argc, char *argv[]) 57 { 58 int ch; 59 60 setlocale(LC_CTYPE, ""); 61 62 if (pledge("stdio rpath", NULL) == -1) 63 err(1, "pledge"); 64 65 while ((ch = getopt(argc, argv, "lwchm")) != -1) 66 switch(ch) { 67 case 'l': 68 doline = 1; 69 break; 70 case 'w': 71 doword = 1; 72 break; 73 case 'm': 74 if (MB_CUR_MAX > 1) 75 multibyte = 1; 76 /* FALLTHROUGH */ 77 case 'c': 78 dochar = 1; 79 break; 80 case 'h': 81 humanchar = 1; 82 break; 83 case '?': 84 default: 85 fprintf(stderr, 86 "usage: %s [-c | -m] [-hlw] [file ...]\n", 87 __progname); 88 return 1; 89 } 90 argv += optind; 91 argc -= optind; 92 93 /* 94 * wc is unusual in that its flags are on by default, so, 95 * if you don't get any arguments, you have to turn them 96 * all on. 97 */ 98 if (!doline && !doword && !dochar) 99 doline = doword = dochar = 1; 100 101 if (!*argv) { 102 cnt(NULL); 103 } else { 104 int dototal = (argc > 1); 105 106 do { 107 cnt(*argv); 108 } while(*++argv); 109 110 if (dototal) 111 print_counts(tlinect, twordct, tcharct, "total"); 112 } 113 114 return rval; 115 } 116 117 static void 118 cnt(char *file) 119 { 120 static char *buf; 121 static size_t bufsz; 122 123 FILE *stream; 124 char *C; 125 wchar_t wc; 126 short gotsp; 127 ssize_t len; 128 int64_t linect, wordct, charct; 129 struct stat sbuf; 130 int fd; 131 132 linect = wordct = charct = 0; 133 stream = NULL; 134 if (file) { 135 if ((fd = open(file, O_RDONLY, 0)) == -1) { 136 warn("%s", file); 137 rval = 1; 138 return; 139 } 140 } else { 141 fd = STDIN_FILENO; 142 } 143 144 if (!doword && !multibyte) { 145 if (bufsz < MAXBSIZE && 146 (buf = realloc(buf, MAXBSIZE)) == NULL) 147 err(1, NULL); 148 /* 149 * Line counting is split out because it's a lot 150 * faster to get lines than to get words, since 151 * the word count requires some logic. 152 */ 153 if (doline) { 154 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 155 charct += len; 156 for (C = buf; len--; ++C) 157 if (*C == '\n') 158 ++linect; 159 } 160 if (len == -1) { 161 warn("%s", file); 162 rval = 1; 163 } 164 } 165 /* 166 * If all we need is the number of characters and 167 * it's a directory or a regular or linked file, just 168 * stat the puppy. We avoid testing for it not being 169 * a special device in case someone adds a new type 170 * of inode. 171 */ 172 else if (dochar) { 173 mode_t ifmt; 174 175 if (fstat(fd, &sbuf)) { 176 warn("%s", file); 177 rval = 1; 178 } else { 179 ifmt = sbuf.st_mode & S_IFMT; 180 if (ifmt == S_IFREG || ifmt == S_IFLNK 181 || ifmt == S_IFDIR) { 182 charct = sbuf.st_size; 183 } else { 184 while ((len = read(fd, buf, MAXBSIZE)) > 0) 185 charct += len; 186 if (len == -1) { 187 warn("%s", file); 188 rval = 1; 189 } 190 } 191 } 192 } 193 } else { 194 if (file == NULL) 195 stream = stdin; 196 else if ((stream = fdopen(fd, "r")) == NULL) { 197 warn("%s", file); 198 close(fd); 199 rval = 1; 200 return; 201 } 202 203 /* 204 * Do it the hard way. 205 * According to POSIX, a word is a "maximal string of 206 * characters delimited by whitespace." Nothing is said 207 * about a character being printing or non-printing. 208 */ 209 gotsp = 1; 210 while ((len = getline(&buf, &bufsz, stream)) > 0) { 211 if (multibyte) { 212 const char *end = buf + len; 213 for (C = buf; C < end; C += len) { 214 ++charct; 215 len = mbtowc(&wc, C, MB_CUR_MAX); 216 if (len == -1) { 217 mbtowc(NULL, NULL, 218 MB_CUR_MAX); 219 len = 1; 220 wc = L'?'; 221 } else if (len == 0) 222 len = 1; 223 if (iswspace(wc)) { 224 gotsp = 1; 225 if (wc == L'\n') 226 ++linect; 227 } else if (gotsp) { 228 gotsp = 0; 229 ++wordct; 230 } 231 } 232 } else { 233 charct += len; 234 for (C = buf; len--; ++C) { 235 if (isspace((unsigned char)*C)) { 236 gotsp = 1; 237 if (*C == '\n') 238 ++linect; 239 } else if (gotsp) { 240 gotsp = 0; 241 ++wordct; 242 } 243 } 244 } 245 } 246 if (ferror(stream)) { 247 warn("%s", file); 248 rval = 1; 249 } 250 } 251 252 print_counts(linect, wordct, charct, file); 253 254 /* 255 * Don't bother checking doline, doword, or dochar -- speeds 256 * up the common case 257 */ 258 tlinect += linect; 259 twordct += wordct; 260 tcharct += charct; 261 262 if ((stream == NULL ? close(fd) : fclose(stream)) != 0) { 263 warn("%s", file); 264 rval = 1; 265 } 266 } 267 268 static void 269 format_and_print(int64_t v) 270 { 271 if (humanchar) { 272 char result[FMT_SCALED_STRSIZE]; 273 274 fmt_scaled((long long)v, result); 275 printf("%7s", result); 276 } else { 277 printf(" %7lld", v); 278 } 279 } 280 281 static void 282 print_counts(int64_t lines, int64_t words, int64_t chars, char *name) 283 { 284 if (doline) 285 format_and_print(lines); 286 if (doword) 287 format_and_print(words); 288 if (dochar) 289 format_and_print(chars); 290 291 if (name) 292 printf(" %s\n", name); 293 else 294 printf("\n"); 295 } 296