1 /* 2 * Copyright (c) 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#) Copyright (c) 1988, 1993 The Regents of the University of California. All rights reserved. 30 * @(#)tr.c 8.2 (Berkeley) 5/4/95 31 * $FreeBSD: head/usr.bin/tr/tr.c 245767 2013-01-22 05:39:34Z andrew $ 32 */ 33 34 #include <sys/types.h> 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <stdint.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <unistd.h> 45 #include <wchar.h> 46 #include <wctype.h> 47 48 #include "cmap.h" 49 #include "cset.h" 50 #include "extern.h" 51 52 static STR s1 = { STRING1, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 53 static STR s2 = { STRING2, NORMAL, 0, OOBCH, 0, { 0, OOBCH }, NULL, NULL }; 54 55 static struct cset *setup(char *, STR *, int, int); 56 static void usage(void); 57 58 int 59 main(int argc, char **argv) 60 { 61 static int carray[NCHARS_SB]; 62 struct cmap *map; 63 struct cset *delete, *squeeze; 64 int n, *p; 65 int Cflag, cflag, dflag, sflag, isstring2; 66 wint_t ch, cnt, lastch; 67 68 (void)setlocale(LC_ALL, ""); 69 70 Cflag = cflag = dflag = sflag = 0; 71 while ((ch = getopt(argc, argv, "Ccdsu")) != -1) 72 switch((char)ch) { 73 case 'C': 74 Cflag = 1; 75 cflag = 0; 76 break; 77 case 'c': 78 cflag = 1; 79 Cflag = 0; 80 break; 81 case 'd': 82 dflag = 1; 83 break; 84 case 's': 85 sflag = 1; 86 break; 87 case 'u': 88 setbuf(stdout, NULL); 89 break; 90 case '?': 91 default: 92 usage(); 93 } 94 argc -= optind; 95 argv += optind; 96 97 switch(argc) { 98 case 0: 99 default: 100 usage(); 101 /* NOTREACHED */ 102 case 1: 103 isstring2 = 0; 104 break; 105 case 2: 106 isstring2 = 1; 107 break; 108 } 109 110 /* 111 * tr -ds [-Cc] string1 string2 112 * Delete all characters (or complemented characters) in string1. 113 * Squeeze all characters in string2. 114 */ 115 if (dflag && sflag) { 116 if (!isstring2) 117 usage(); 118 119 delete = setup(argv[0], &s1, cflag, Cflag); 120 squeeze = setup(argv[1], &s2, 0, 0); 121 122 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 123 if (!cset_in(delete, ch) && 124 (lastch != ch || !cset_in(squeeze, ch))) { 125 lastch = ch; 126 (void)putwchar(ch); 127 } 128 if (ferror(stdin)) 129 err(1, NULL); 130 exit(0); 131 } 132 133 /* 134 * tr -d [-Cc] string1 135 * Delete all characters (or complemented characters) in string1. 136 */ 137 if (dflag) { 138 if (isstring2) 139 usage(); 140 141 delete = setup(argv[0], &s1, cflag, Cflag); 142 143 while ((ch = getwchar()) != WEOF) 144 if (!cset_in(delete, ch)) 145 (void)putwchar(ch); 146 if (ferror(stdin)) 147 err(1, NULL); 148 exit(0); 149 } 150 151 /* 152 * tr -s [-Cc] string1 153 * Squeeze all characters (or complemented characters) in string1. 154 */ 155 if (sflag && !isstring2) { 156 squeeze = setup(argv[0], &s1, cflag, Cflag); 157 158 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) 159 if (lastch != ch || !cset_in(squeeze, ch)) { 160 lastch = ch; 161 (void)putwchar(ch); 162 } 163 if (ferror(stdin)) 164 err(1, NULL); 165 exit(0); 166 } 167 168 /* 169 * tr [-Ccs] string1 string2 170 * Replace all characters (or complemented characters) in string1 with 171 * the character in the same position in string2. If the -s option is 172 * specified, squeeze all the characters in string2. 173 */ 174 if (!isstring2) 175 usage(); 176 177 map = cmap_alloc(); 178 if (map == NULL) 179 err(1, NULL); 180 squeeze = cset_alloc(); 181 if (squeeze == NULL) 182 err(1, NULL); 183 184 s1.str = argv[0]; 185 186 if (Cflag || cflag) { 187 cmap_default(map, OOBCH); 188 if ((s2.str = strdup(argv[1])) == NULL) 189 errx(1, "strdup(argv[1])"); 190 } else 191 s2.str = argv[1]; 192 193 if (!next(&s2)) 194 errx(1, "empty string2"); 195 196 /* 197 * For -s result will contain only those characters defined 198 * as the second characters in each of the toupper or tolower 199 * pairs. 200 */ 201 202 /* If string2 runs out of characters, use the last one specified. */ 203 while (next(&s1)) { 204 again: 205 if (s1.state == CCLASS_LOWER && 206 s2.state == CCLASS_UPPER && 207 s1.cnt == 1 && s2.cnt == 1) { 208 do { 209 ch = towupper(s1.lastch); 210 cmap_add(map, s1.lastch, ch); 211 if (sflag && iswupper(ch)) 212 cset_add(squeeze, ch); 213 if (!next(&s1)) 214 goto endloop; 215 } while (s1.state == CCLASS_LOWER && s1.cnt > 1); 216 /* skip upper set */ 217 do { 218 if (!next(&s2)) 219 break; 220 } while (s2.state == CCLASS_UPPER && s2.cnt > 1); 221 goto again; 222 } else if (s1.state == CCLASS_UPPER && 223 s2.state == CCLASS_LOWER && 224 s1.cnt == 1 && s2.cnt == 1) { 225 do { 226 ch = towlower(s1.lastch); 227 cmap_add(map, s1.lastch, ch); 228 if (sflag && iswlower(ch)) 229 cset_add(squeeze, ch); 230 if (!next(&s1)) 231 goto endloop; 232 } while (s1.state == CCLASS_UPPER && s1.cnt > 1); 233 /* skip lower set */ 234 do { 235 if (!next(&s2)) 236 break; 237 } while (s2.state == CCLASS_LOWER && s2.cnt > 1); 238 goto again; 239 } else { 240 cmap_add(map, s1.lastch, s2.lastch); 241 if (sflag) 242 cset_add(squeeze, s2.lastch); 243 } 244 (void)next(&s2); 245 } 246 endloop: 247 if (cflag || (Cflag && MB_CUR_MAX > 1)) { 248 /* 249 * This is somewhat tricky: since the character set is 250 * potentially huge, we need to avoid allocating a map 251 * entry for every character. Our strategy is to set the 252 * default mapping to the last character of string #2 253 * (= the one that gets automatically repeated), then to 254 * add back identity mappings for characters that should 255 * remain unchanged. We don't waste space on identity mappings 256 * for non-characters with the -C option; those are simulated 257 * in the I/O loop. 258 */ 259 s2.str = argv[1]; 260 s2.state = NORMAL; 261 for (cnt = 0; cnt < WINT_MAX; cnt++) { 262 if (Cflag && !iswrune(cnt)) 263 continue; 264 if (cmap_lookup(map, cnt) == OOBCH) { 265 if (next(&s2)) 266 cmap_add(map, cnt, s2.lastch); 267 if (sflag) 268 cset_add(squeeze, s2.lastch); 269 } else 270 cmap_add(map, cnt, cnt); 271 if ((s2.state == EOS || s2.state == INFINITE) && 272 cnt >= cmap_max(map)) 273 break; 274 } 275 cmap_default(map, s2.lastch); 276 } else if (Cflag) { 277 for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) { 278 if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt)) 279 *p++ = cnt; 280 else 281 cmap_add(map, cnt, cnt); 282 } 283 n = p - carray; 284 if (Cflag && n > 1) 285 (void)mergesort(carray, n, sizeof(*carray), charcoll); 286 287 s2.str = argv[1]; 288 s2.state = NORMAL; 289 for (cnt = 0; cnt < n; cnt++) { 290 (void)next(&s2); 291 cmap_add(map, carray[cnt], s2.lastch); 292 /* 293 * Chars taken from s2 can be different this time 294 * due to lack of complex upper/lower processing, 295 * so fill string2 again to not miss some. 296 */ 297 if (sflag) 298 cset_add(squeeze, s2.lastch); 299 } 300 } 301 302 cset_cache(squeeze); 303 cmap_cache(map); 304 305 if (sflag) 306 for (lastch = OOBCH; (ch = getwchar()) != WEOF;) { 307 if (!Cflag || iswrune(ch)) 308 ch = cmap_lookup(map, ch); 309 if (lastch != ch || !cset_in(squeeze, ch)) { 310 lastch = ch; 311 (void)putwchar(ch); 312 } 313 } 314 else 315 while ((ch = getwchar()) != WEOF) { 316 if (!Cflag || iswrune(ch)) 317 ch = cmap_lookup(map, ch); 318 (void)putwchar(ch); 319 } 320 if (ferror(stdin)) 321 err(1, NULL); 322 exit (0); 323 } 324 325 static struct cset * 326 setup(char *arg, STR *str, int cflag, int Cflag) 327 { 328 struct cset *cs; 329 330 cs = cset_alloc(); 331 if (cs == NULL) 332 err(1, NULL); 333 str->str = arg; 334 while (next(str)) 335 cset_add(cs, str->lastch); 336 if (Cflag) 337 cset_addclass(cs, wctype("rune"), true); 338 if (cflag || Cflag) 339 cset_invert(cs); 340 cset_cache(cs); 341 return (cs); 342 } 343 344 int 345 charcoll(const void *a, const void *b) 346 { 347 static char sa[2], sb[2]; 348 349 sa[0] = *(const int *)a; 350 sb[0] = *(const int *)b; 351 return (strcoll(sa, sb)); 352 } 353 354 static void 355 usage(void) 356 { 357 (void)fprintf(stderr, "%s\n%s\n%s\n%s\n", 358 "usage: tr [-Ccsu] string1 string2", 359 " tr [-Ccu] -d string1", 360 " tr [-Ccu] -s string1", 361 " tr [-Ccu] -ds string1 string2"); 362 exit(1); 363 } 364