1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1992-2012 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Eclipse Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.eclipse.org/org/documents/epl-v10.html * 11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * * 20 ***********************************************************************/ 21 #pragma prototyped 22 /* 23 * David Korn 24 * Glenn Fowler 25 * AT&T Bell Laboratories 26 * 27 * cat 28 */ 29 30 #include <cmd.h> 31 #include <fcntl.h> 32 33 static const char usage[] = 34 "[-?\n@(#)$Id: cat (AT&T Research) 2012-05-31 $\n]" 35 USAGE_LICENSE 36 "[+NAME?cat - concatenate files]" 37 "[+DESCRIPTION?\bcat\b copies each \afile\a in sequence to the standard" 38 " output. If no \afile\a is given, or if the \afile\a is \b-\b," 39 " \bcat\b copies from standard input starting at the current location.]" 40 41 "[b:number-nonblank?Number lines as with \b-n\b but omit line numbers from" 42 " blank lines.]" 43 "[d:dos-input?Input files are opened in \atext\amode which removes carriage" 44 " returns in front of new-lines on some systems.]" 45 "[e?Equivalent to \b-vE\b.]" 46 "[n:number?Causes a line number to be inserted at the beginning of each line.]" 47 "[s?Equivalent to \b-S\b for \aatt\a universe and \b-B\b otherwise.]" 48 "[t?Equivalent to \b-vT\b.]" 49 "[u:unbuffer?The output is not delayed by buffering.]" 50 "[v:show-nonprinting|print-chars?Print characters as follows: space and " 51 "printable characters as themselves; control characters as \b^\b " 52 "followed by a letter of the alphabet; and characters with the high bit " 53 "set as the lower 7 bit character prefixed by \bM^\b for 7 bit " 54 "non-printable characters and \bM-\b for all other characters. If the 7 " 55 "bit character encoding is not ASCII then the characters are converted " 56 "to ASCII to determine \ahigh bit set\a, and if set it is cleared and " 57 "converted back to the native encoding. Multibyte characters in the " 58 "current locale are treated as printable characters.]" 59 "[A:show-all?Equivalent to \b-vET\b.]" 60 "[B:squeeze-blank?Multiple adjacent new-line characters are replace by one" 61 " new-line.]" 62 "[D:dos-output?Output files are opened in \atext\amode which inserts carriage" 63 " returns in front of new-lines on some systems.]" 64 "[E:show-ends?Causes a \b$\b to be inserted before each new-line.]" 65 "[R:regress?Regression test defaults: \b-v\b buffer size 4.]" 66 "[S:silent?\bcat\b is silent about non-existent files.]" 67 "[T:show-blank?Causes tabs to be copied as \b^I\b and formfeeds as \b^L\b.]" 68 69 "\n" 70 "\n[file ...]\n" 71 "\n" 72 73 "[+SEE ALSO?\bcp\b(1), \bgetconf\b(1), \bpr\b(1)]" 74 ; 75 76 #define RUBOUT 0177 77 78 /* control flags */ 79 #define B_FLAG (1<<0) 80 #define E_FLAG (1<<1) 81 #define F_FLAG (1<<2) 82 #define N_FLAG (1<<3) 83 #define S_FLAG (1<<4) 84 #define T_FLAG (1<<5) 85 #define U_FLAG (1<<6) 86 #define V_FLAG (1<<7) 87 #define D_FLAG (1<<8) 88 #define d_FLAG (1<<9) 89 90 /* character types */ 91 #define T_ERROR 1 92 #define T_EOF 2 93 #define T_ENDBUF 3 94 #define T_NEWLINE 4 95 #define T_CONTROL 5 96 #define T_EIGHTBIT 6 97 #define T_CNTL8BIT 7 98 99 #define printof(c) ((c)^0100) 100 101 typedef void* (*Reserve_f)(Sfio_t*, ssize_t, int); 102 103 #ifndef sfvalue 104 #define sfvalue(f) ((f)->_val) 105 #endif 106 107 static void* 108 regress(Sfio_t* sp, ssize_t n, int f) 109 { 110 void* r; 111 112 if (!(r = sfreserve(sp, 4, f))) 113 r = sfreserve(sp, n, f); 114 else if (sfvalue(sp) > 4) 115 sfvalue(sp) = 4; 116 return r; 117 } 118 119 /* 120 * called for any special output processing 121 */ 122 123 static int 124 vcat(register char* states, Sfio_t* ip, Sfio_t* op, Reserve_f reserve, int flags) 125 { 126 register unsigned char* cp; 127 register unsigned char* pp; 128 unsigned char* cur; 129 unsigned char* end; 130 unsigned char* buf; 131 unsigned char* nxt; 132 register int n; 133 register int line; 134 register int raw; 135 int last; 136 int c; 137 int m; 138 int any; 139 int header; 140 141 unsigned char meta[3]; 142 unsigned char tmp[32]; 143 144 meta[0] = 'M'; 145 last = -1; 146 *(cp = buf = end = tmp) = 0; 147 any = 0; 148 header = flags & (B_FLAG|N_FLAG); 149 line = 1; 150 states[0] = T_ENDBUF; 151 raw = !mbwide(); 152 for (;;) 153 { 154 cur = cp; 155 if (raw) 156 while (!(n = states[*cp++])); 157 else 158 for (;;) 159 { 160 while (!(n = states[*cp++])); 161 if (n < T_CONTROL) 162 break; 163 if ((m = mbsize(pp = cp - 1)) > 1) 164 cp += m - 1; 165 else 166 { 167 if (m <= 0) 168 { 169 if (cur == pp) 170 { 171 if (last > 0) 172 { 173 *end = last; 174 last = -1; 175 c = end - pp + 1; 176 if ((m = mbsize(pp)) == c) 177 { 178 any = 1; 179 if (header) 180 { 181 header = 0; 182 sfprintf(op, "%6d\t", line); 183 } 184 sfwrite(op, cur, m); 185 *(cp = cur = end) = 0; 186 } 187 else 188 { 189 memcpy(tmp, pp, c); 190 if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0))) 191 { 192 states[0] = sfvalue(ip) ? T_ERROR : T_EOF; 193 *(cp = end = tmp + sizeof(tmp) - 1) = 0; 194 last = -1; 195 } 196 else if ((n = sfvalue(ip)) <= 0) 197 { 198 states[0] = n ? T_ERROR : T_EOF; 199 *(cp = end = tmp + sizeof(tmp) - 1) = 0; 200 last = -1; 201 } 202 else 203 { 204 cp = buf = nxt; 205 end = buf + n - 1; 206 last = *end; 207 *end = 0; 208 } 209 mb: 210 if ((n = end - cp + 1) >= (sizeof(tmp) - c)) 211 n = sizeof(tmp) - c - 1; 212 memcpy(tmp + c, cp, n); 213 if ((m = mbsize(tmp)) >= c) 214 { 215 any = 1; 216 if (header) 217 { 218 header = 0; 219 sfprintf(op, "%6d\t", line); 220 } 221 sfwrite(op, tmp, m); 222 cur = cp += m - c; 223 } 224 } 225 continue; 226 } 227 } 228 else 229 { 230 cp = pp + 1; 231 n = 0; 232 } 233 } 234 break; 235 } 236 } 237 c = *--cp; 238 if ((m = cp - cur) || n >= T_CONTROL) 239 { 240 flush: 241 any = 1; 242 if (header) 243 { 244 header = 0; 245 sfprintf(op, "%6d\t", line); 246 } 247 if (m) 248 sfwrite(op, cur, m); 249 } 250 special: 251 switch (n) 252 { 253 case T_ERROR: 254 if (cp < end) 255 { 256 n = T_CONTROL; 257 goto flush; 258 } 259 return -1; 260 case T_EOF: 261 if (cp < end) 262 { 263 n = T_CONTROL; 264 goto flush; 265 } 266 return 0; 267 case T_ENDBUF: 268 if (cp < end) 269 { 270 n = T_CONTROL; 271 goto flush; 272 } 273 c = last; 274 if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0))) 275 { 276 *(cp = end = tmp + sizeof(tmp) - 1) = 0; 277 states[0] = (m = sfvalue(ip)) ? T_ERROR : T_EOF; 278 last = -1; 279 } 280 else if ((m = sfvalue(ip)) <= 0) 281 { 282 *(cp = end = tmp + sizeof(tmp) - 1) = 0; 283 states[0] = m ? T_ERROR : T_EOF; 284 last = -1; 285 } 286 else 287 { 288 buf = nxt; 289 end = buf + m - 1; 290 last = *end; 291 *end = 0; 292 cp = buf; 293 } 294 if (c >= 0) 295 { 296 if (!(n = states[c])) 297 { 298 *(cur = tmp) = c; 299 m = 1; 300 goto flush; 301 } 302 if (raw || n < T_CONTROL) 303 { 304 cp--; 305 goto special; 306 } 307 tmp[0] = c; 308 c = 1; 309 goto mb; 310 } 311 break; 312 case T_CONTROL: 313 do 314 { 315 sfputc(op, '^'); 316 sfputc(op, printof(c)); 317 } while (states[c = *++cp] == T_CONTROL); 318 break; 319 case T_CNTL8BIT: 320 meta[1] = '^'; 321 do 322 { 323 n = c & ~0200; 324 meta[2] = printof(n); 325 sfwrite(op, (char*)meta, 3); 326 } while (states[c = *++cp] == T_CNTL8BIT && raw); 327 break; 328 case T_EIGHTBIT: 329 meta[1] = '-'; 330 do 331 { 332 meta[2] = c & ~0200; 333 sfwrite(op, (char*)meta, 3); 334 } while (states[c = *++cp] == T_EIGHTBIT && raw); 335 break; 336 case T_NEWLINE: 337 if (header && !(flags & B_FLAG)) 338 sfprintf(op, "%6d\t", line); 339 if (flags & E_FLAG) 340 sfputc(op, '$'); 341 sfputc(op, '\n'); 342 if (!header || !(flags & B_FLAG)) 343 line++; 344 header = !(flags & S_FLAG); 345 for (;;) 346 { 347 if ((n = states[*++cp]) == T_ENDBUF) 348 { 349 if (cp < end || last != '\n') 350 break; 351 if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0))) 352 { 353 states[0] = sfvalue(ip) ? T_ERROR : T_EOF; 354 cp = end = tmp; 355 *cp-- = 0; 356 last = -1; 357 } 358 else if ((n = sfvalue(ip)) <= 0) 359 { 360 states[0] = n ? T_ERROR : T_EOF; 361 cp = end = tmp; 362 *cp-- = 0; 363 last = -1; 364 } 365 else 366 { 367 buf = nxt; 368 end = buf + n - 1; 369 last = *end; 370 *end = 0; 371 cp = buf - 1; 372 } 373 } 374 else if (n != T_NEWLINE) 375 break; 376 if (!(flags & S_FLAG) || any || header) 377 { 378 any = 0; 379 header = 0; 380 if ((flags & (B_FLAG|N_FLAG)) == N_FLAG) 381 sfprintf(op, "%6d\t", line); 382 if (flags & E_FLAG) 383 sfputc(op, '$'); 384 sfputc(op, '\n'); 385 } 386 if (!(flags & B_FLAG)) 387 line++; 388 } 389 header = flags & (B_FLAG|N_FLAG); 390 break; 391 } 392 } 393 } 394 395 int 396 b_cat(int argc, char** argv, Shbltin_t* context) 397 { 398 register int n; 399 register int flags = 0; 400 register char* cp; 401 register Sfio_t* fp; 402 char* mode; 403 Reserve_f reserve = sfreserve; 404 int att; 405 int dovcat = 0; 406 char states[UCHAR_MAX+1]; 407 408 cmdinit(argc, argv, context, ERROR_CATALOG, 0); 409 att = !strcmp(astconf("UNIVERSE", NiL, NiL), "att"); 410 mode = "r"; 411 for (;;) 412 { 413 n = 0; 414 switch (optget(argv, usage)) 415 { 416 case 'A': 417 n = T_FLAG|E_FLAG|V_FLAG; 418 break; 419 case 'B': 420 n = S_FLAG; 421 break; 422 case 'b': 423 n = B_FLAG; 424 break; 425 case 'd': 426 mode = opt_info.num ? "rt" : "r"; 427 continue; 428 case 'D': 429 n = d_FLAG; 430 break; 431 case 'E': 432 n = E_FLAG; 433 break; 434 case 'e': 435 n = E_FLAG|V_FLAG; 436 break; 437 case 'n': 438 n = N_FLAG; 439 break; 440 case 'R': 441 reserve = opt_info.num ? regress : sfreserve; 442 continue; 443 case 's': 444 n = att ? F_FLAG : S_FLAG; 445 break; 446 case 'S': 447 n = F_FLAG; 448 break; 449 case 'T': 450 n = T_FLAG; 451 break; 452 case 't': 453 n = T_FLAG|V_FLAG; 454 break; 455 case 'u': 456 n = U_FLAG; 457 break; 458 case 'v': 459 n = V_FLAG; 460 break; 461 case ':': 462 error(2, "%s", opt_info.arg); 463 break; 464 case '?': 465 error(ERROR_usage(2), "%s", opt_info.arg); 466 break; 467 } 468 if (!n) 469 break; 470 if (opt_info.num) 471 flags |= n; 472 else 473 flags &= ~n; 474 } 475 argv += opt_info.index; 476 if (error_info.errors) 477 error(ERROR_usage(2), "%s", optusage(NiL)); 478 memset(states, 0, sizeof(states)); 479 if (flags&V_FLAG) 480 { 481 memset(states, T_CONTROL, ' '); 482 states[RUBOUT] = T_CONTROL; 483 memset(states+0200, T_EIGHTBIT, 0200); 484 memset(states+0200, T_CNTL8BIT, ' '); 485 states[RUBOUT|0200] = T_CNTL8BIT; 486 states['\n'] = 0; 487 } 488 if (flags&T_FLAG) 489 states['\t'] = T_CONTROL; 490 states[0] = T_ENDBUF; 491 if (att) 492 { 493 if (flags&V_FLAG) 494 { 495 states['\n'|0200] = T_EIGHTBIT; 496 if (!(flags&T_FLAG)) 497 { 498 states['\t'] = states['\f'] = 0; 499 states['\t'|0200] = states['\f'|0200] = T_EIGHTBIT; 500 } 501 } 502 } 503 else if (flags) 504 { 505 if (!(flags&T_FLAG)) 506 states['\t'] = 0; 507 } 508 if (flags&(V_FLAG|T_FLAG|N_FLAG|E_FLAG|B_FLAG|S_FLAG)) 509 { 510 states['\n'] = T_NEWLINE; 511 dovcat = 1; 512 } 513 if (flags&d_FLAG) 514 sfopen(sfstdout, NiL, "wt"); 515 if (cp = *argv) 516 argv++; 517 do 518 { 519 if (!cp || streq(cp, "-")) 520 { 521 fp = sfstdin; 522 if (flags&D_FLAG) 523 sfopen(fp, NiL, mode); 524 } 525 else if (!(fp = sfopen(NiL, cp, mode))) 526 { 527 if (!(flags&F_FLAG)) 528 error(ERROR_system(0), "%s: cannot open", cp); 529 error_info.errors = 1; 530 continue; 531 } 532 if (flags&U_FLAG) 533 sfsetbuf(fp, (void*)fp, -1); 534 if (dovcat) 535 n = vcat(states, fp, sfstdout, reserve, flags); 536 else if (sfmove(fp, sfstdout, SF_UNBOUND, -1) >= 0 && sfeof(fp)) 537 n = 0; 538 else 539 n = -1; 540 if (fp != sfstdin) 541 sfclose(fp); 542 if (n < 0 && !ERROR_PIPE(errno) && errno != EINTR) 543 { 544 if (cp) 545 error(ERROR_system(0), "%s: read error", cp); 546 else 547 error(ERROR_system(0), "read error"); 548 } 549 if (sferror(sfstdout)) 550 break; 551 } while (cp = *argv++); 552 if (sfsync(sfstdout)) 553 error(ERROR_system(0), "write error"); 554 if (flags&d_FLAG) 555 sfopen(sfstdout, NiL, "w"); 556 return error_info.errors; 557 } 558