xref: /original-bsd/usr.bin/tr/tr.c (revision 6b3572dd)
1 /*
2  * Copyright (c) 1988 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 char copyright[] =
10 "@(#) Copyright (c) 1988 The Regents of the University of California.\n\
11  All rights reserved.\n";
12 #endif /* not lint */
13 
14 #ifndef lint
15 static char sccsid[] = "@(#)tr.c	4.7 (Berkeley) 07/23/90";
16 #endif /* not lint */
17 
18 #include <sys/types.h>
19 #include <stdio.h>
20 #include <ctype.h>
21 
22 #define	NCHARS	256				/* size of u_char */
23 #define	OOBCH	257				/* out of band value */
24 
25 typedef struct {
26 	char *str;
27 	int lastch, endrange;
28 	enum { NORM, INRANGE, EOS } state;
29 } STR;
30 
31 main(argc, argv)
32 	int argc;
33 	char **argv;
34 {
35 	extern int optind;
36 	STR s1, s2;
37 	register int ch, indx, lastch;
38 	int cflag, dflag, sflag;
39 	u_char *tp, tab[NCHARS], squeeze[NCHARS];
40 
41 	cflag = dflag = sflag = 0;
42 	while ((ch = getopt(argc, argv, "cds")) != EOF)
43 		switch((char)ch) {
44 		case 'c':
45 			cflag = 1;
46 			break;
47 		case 'd':
48 			dflag = 1;
49 			break;
50 		case 's':
51 			sflag = 1;
52 			break;
53 		case '?':
54 		default:
55 			fprintf(stderr,
56 			    "usage: tr [-cds] [string1 [string2]]\n");
57 			exit(1);
58 		}
59 	argc -= optind;
60 	argv += optind;
61 
62 	/*
63 	 * the original tr was amazingly tolerant of the command line.
64 	 * Neither -c or -s have any effect unless there are two strings.
65 	 * Extra arguments are silently ignored.  Bag this noise, they
66 	 * should all be errors.
67 	 */
68 	if (argc < 2 && !dflag) {
69 		while ((ch = getchar()) != EOF)
70 			putchar(ch);
71 		exit(0);
72 	}
73 
74 	bzero(tab, NCHARS);
75 	if (sflag) {
76 		s1.str = argv[1];
77 		s1.state = NORM;
78 		s1.lastch = OOBCH;
79 		while (next(&s1))
80 			squeeze[s1.lastch] = 1;
81 	}
82 	if (dflag) {
83 		s1.str = argv[0];
84 		s1.state = NORM;
85 		s1.lastch = OOBCH;
86 		while (next(&s1))
87 			tab[s1.lastch] = 1;
88 		if (cflag)
89 			for (tp = tab, indx = 0; indx < NCHARS; ++tp, ++indx)
90 				*tp = !*tp;
91 		if (sflag)
92 			for (lastch = OOBCH; (ch = getchar()) != EOF;) {
93 				if (tab[ch] || (squeeze[ch] && lastch == ch))
94 					continue;
95 				lastch = ch;
96 				putchar(ch);
97 			}
98 		else
99 			while ((ch = getchar()) != EOF)
100 				if (!tab[ch])
101 					putchar(ch);
102 	} else {
103 		s1.str = argv[0];
104 		s2.str = argv[1];
105 		s1.state = s2.state = NORM;
106 		s1.lastch = s2.lastch = OOBCH;
107 		if (cflag) {
108 			/*
109 			 * if cflag is set, tr just pretends it only got one
110 			 * character in string2.  As reasonable as anything
111 			 * else.  Should really be an error.
112 			 */
113 			while (next(&s2));
114 			lastch = s2.lastch;
115 			for (tp = tab, indx = 0; indx < NCHARS; ++tp, ++indx)
116 				*tp = lastch;
117 			while (next(&s1))
118 				tab[s1.lastch] = s1.lastch;
119 		} else {
120 			for (tp = tab, indx = 0; indx < NCHARS; ++tp, ++indx)
121 				*tp = indx;
122 			while (next(&s1)) {
123 				(void)next(&s2);
124 				tab[s1.lastch] = s2.lastch;
125 			}
126 		}
127 		if (sflag)
128 			for (lastch = OOBCH; (ch = getchar()) != EOF;) {
129 				ch = tab[ch];
130 				if (squeeze[ch] && lastch == ch)
131 					continue;
132 				lastch = ch;
133 				putchar(ch);
134 			}
135 		else
136 			while ((ch = getchar()) != EOF)
137 				putchar((int)tab[ch]);
138 	}
139 	exit(0);
140 }
141 
142 next(s)
143 	register STR *s;
144 {
145 	register int ch;
146 
147 	if (s->state == EOS)
148 		return(0);
149 	if (s->state == INRANGE) {
150 		if (++s->lastch == s->endrange)
151 			s->state = NORM;
152 		return(1);
153 	}
154 	if (!(ch = *s->str++)) {
155 		s->state = EOS;
156 		return(0);
157 	}
158 	if (ch == '\\') {			/* \### */
159 		s->lastch = tran(s);
160 		return(1);
161 	}
162 	if (ch == '-') {			/* ranges */
163 		if (s->lastch == OOBCH)		/* "-a" */
164 			goto fail2;
165 		if (!(ch = *s->str++))		/* "a-" */
166 			goto fail1;
167 		if (ch == '\\')			/* \### */
168 			ch = tran(s);
169 		if (s->lastch > ch) { 		/* "z-a" */
170 fail1:			--s->str;
171 fail2:			s->lastch = '-';
172 			return(1);
173 		}
174 		if (s->lastch == ch)		/* "a-a" */
175 			return(next(s));
176 		s->state = INRANGE;		/* "a-z" */
177 		s->endrange = ch;
178 		return(1);
179 	}
180 	s->lastch = ch;
181 	return(1);
182 }
183 
184 /*
185  * Translate \-escapes.  Up to 3 octal digits => char; no digits => literal.
186  * Unadorned backslash "\" is like \000.
187  */
188 tran(s)
189 	register STR *s;
190 {
191 	register int ch, cnt = 0, val = 0;
192 
193 	for (;;) {
194 		ch = *s->str++;
195 		if (!isascii(ch) || !isdigit(ch) || ++cnt > 3)
196 			break;
197 		val = val * 8 + ch - '0';
198 	}
199 	if (cnt || ch == 0)
200 		s->str--;
201 	return (cnt ? val : ch);
202 }
203