1 /*
2  * entab.c - adds/removes tabs from text files
3  */
4 
5 #include <errno.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <stdarg.h>
10 #include <unistd.h>
11 
12 #if defined(WIN32) || defined(__CYGWIN__)
13 #define PG_BINARY_R "rb"
14 #else
15 #define PG_BINARY_R "r"
16 #endif
17 
18 #define NUL		'\0'
19 
20 #ifndef TRUE
21 #define TRUE	1
22 #endif
23 #ifndef FALSE
24 #define FALSE	0
25 #endif
26 
27 extern char *optarg;
28 extern int	optind;
29 
30 
31 static void
output_accumulated_spaces(int * prv_spaces,char ** dst)32 output_accumulated_spaces(int *prv_spaces, char **dst)
33 {
34 	for (; *prv_spaces > 0; (*prv_spaces)--)
35 		*((*dst)++) = ' ';
36 }
37 
38 
39 static void
trim_trailing_whitespace(int * prv_spaces,char ** dst,char * out_line)40 trim_trailing_whitespace(int *prv_spaces, char **dst, char *out_line)
41 {
42 	while (*dst > out_line &&
43 		   (*((*dst) - 1) == ' ' || *((*dst) - 1) == '\t'))
44 		(*dst)--;
45 	*prv_spaces = 0;
46 }
47 
48 
49 int
main(int argc,char ** argv)50 main(int argc, char **argv)
51 {
52 	int			tab_size = 8,
53 				min_spaces = 2,
54 				only_comment_periods = FALSE,
55 				protect_quotes = FALSE,
56 				protect_leading_whitespace = FALSE,
57 				del_tabs = FALSE,
58 				clip_lines = FALSE,
59 				in_comment = FALSE,
60 				was_period = FALSE,
61 				prv_spaces,
62 				col_in_tab,
63 				escaped,
64 				nxt_spaces,
65 				in_leading_whitespace;
66 	char		in_line[BUFSIZ],
67 				out_line[BUFSIZ],
68 			   *src,
69 			   *dst,
70 				quote_char,
71 			   *cp;
72 	int			ch;
73 	FILE	   *in_file;
74 
75 	if ((cp = strrchr(argv[0], '/')) != NULL)
76 		++cp;
77 	else
78 		cp = argv[0];
79 	if (strcmp(cp, "detab") == 0)
80 		del_tabs = 1;
81 
82 	while ((ch = getopt(argc, argv, "cdhlmqs:t:")) != -1)
83 		switch (ch)
84 		{
85 			case 'c':
86 				clip_lines = TRUE;
87 				break;
88 			case 'd':
89 				del_tabs = TRUE;
90 				break;
91 			case 'l':
92 				protect_leading_whitespace = TRUE;
93 				break;
94 			case 'm':
95 				/* only process text followed by periods in C comments */
96 				only_comment_periods = TRUE;
97 				break;
98 			case 'q':
99 				protect_quotes = TRUE;
100 				break;
101 			case 's':
102 				min_spaces = atoi(optarg);
103 				break;
104 			case 't':
105 				tab_size = atoi(optarg);
106 				break;
107 			case 'h':
108 			case '?':
109 				fprintf(stderr, "USAGE: %s [ -cdqst ] [file ...]\n\
110 	-c (clip trailing whitespace)\n\
111 	-d (delete tabs)\n\
112 	-l (protect leading whitespace)\n\
113 	-m (only C comment periods)\n\
114 	-q (protect quotes)\n\
115 	-s minimum_spaces\n\
116 	-t tab_width\n",
117 						cp);
118 				exit(0);
119 		}
120 
121 	argv += optind;
122 	argc -= optind;
123 
124 	/* process arguments */
125 	do
126 	{
127 		if (argc < 1)
128 			in_file = stdin;
129 		else
130 		{
131 			if ((in_file = fopen(*argv, PG_BINARY_R)) == NULL)
132 			{
133 				fprintf(stderr, "Cannot open file %s: %s\n", argv[0], strerror(errno));
134 				exit(1);
135 			}
136 			argv++;
137 		}
138 
139 		escaped = FALSE;
140 
141 		/* process lines */
142 		while (fgets(in_line, sizeof(in_line), in_file) != NULL)
143 		{
144 			col_in_tab = 0;
145 			prv_spaces = 0;
146 			src = in_line;		/* points to current processed char */
147 			dst = out_line;		/* points to next unallocated char */
148 			if (escaped == FALSE)
149 				quote_char = ' ';
150 			escaped = FALSE;
151 			in_leading_whitespace = TRUE;
152 
153 			/* process line */
154 			while (*src != NUL)
155 			{
156 				col_in_tab++;
157 
158 				/* look backward so we handle slash-star-slash properly */
159 				if (!in_comment && src > in_line &&
160 					*(src - 1) == '/' && *src == '*')
161 					in_comment = TRUE;
162 				else if (in_comment && *src == '*' && *(src + 1) == '/')
163 					in_comment = FALSE;
164 
165 				/* Is this a potential space/tab replacement? */
166 				if ((!only_comment_periods || (in_comment && was_period)) &&
167 					(!protect_leading_whitespace || !in_leading_whitespace) &&
168 					quote_char == ' ' && (*src == ' ' || *src == '\t'))
169 				{
170 					if (*src == '\t')
171 					{
172 						prv_spaces += tab_size - col_in_tab + 1;
173 						col_in_tab = tab_size;
174 					}
175 					else
176 						prv_spaces++;
177 
178 					/* Are we at a tab stop? */
179 					if (col_in_tab == tab_size)
180 					{
181 						/*
182 						 * Is the next character going to be a tab?  We do tab
183 						 * replacement in the current spot if the next char is
184 						 * going to be a tab and ignore min_spaces.
185 						 */
186 						nxt_spaces = 0;
187 						while (1)
188 						{
189 							/* Have we reached non-whitespace? */
190 							if (*(src + nxt_spaces + 1) == NUL ||
191 								(*(src + nxt_spaces + 1) != ' ' &&
192 								 *(src + nxt_spaces + 1) != '\t'))
193 								break;
194 							/* count spaces */
195 							if (*(src + nxt_spaces + 1) == ' ')
196 								++nxt_spaces;
197 							/* Have we found a forward tab? */
198 							if (*(src + nxt_spaces + 1) == '\t' ||
199 								nxt_spaces == tab_size)
200 							{
201 								nxt_spaces = tab_size;
202 								break;
203 							}
204 						}
205 						/* Do tab replacment for spaces? */
206 						if ((prv_spaces >= min_spaces ||
207 							 nxt_spaces == tab_size) &&
208 							del_tabs == FALSE)
209 						{
210 							*(dst++) = '\t';
211 							prv_spaces = 0;
212 						}
213 						else
214 							output_accumulated_spaces(&prv_spaces, &dst);
215 					}
216 				}
217 				/* Not a potential space/tab replacement */
218 				else
219 				{
220 					/* allow leading stars in comments */
221 					if (in_leading_whitespace && *src != ' ' && *src != '\t' &&
222 						(!in_comment || *src != '*'))
223 						in_leading_whitespace = FALSE;
224 					was_period = (*src == '.');
225 					/* output accumulated spaces */
226 					output_accumulated_spaces(&prv_spaces, &dst);
227 					/* This can only happen in a quote. */
228 					if (*src == '\t')
229 						col_in_tab = 0;
230 					/* visual backspace? */
231 					if (*src == '\b')
232 						col_in_tab -= 2;
233 					/* Do we process quotes? */
234 					if (escaped == FALSE && protect_quotes == TRUE)
235 					{
236 						if (*src == '\\')
237 							escaped = TRUE;
238 						/* Is this a quote character? */
239 						if (*src == '"' || *src == '\'')
240 						{
241 							/* toggle quote mode */
242 							if (quote_char == ' ')
243 								quote_char = *src;
244 							else if (*src == quote_char)
245 								quote_char = ' ';
246 						}
247 					}
248 					/* newlines/CRs do not terminate escapes */
249 					else if (*src != '\r' && *src != '\n')
250 						escaped = FALSE;
251 
252 					/* reached newline/CR;	clip line? */
253 					if ((*src == '\r' || *src == '\n') &&
254 						clip_lines == TRUE &&
255 						quote_char == ' ' &&
256 						escaped == FALSE)
257 						trim_trailing_whitespace(&prv_spaces, &dst, out_line);
258 					*(dst++) = *src;
259 				}
260 				col_in_tab %= tab_size;
261 				++src;
262 			}
263 			/* for cases where the last line of file has no newline */
264 			if (clip_lines == TRUE && escaped == FALSE)
265 				trim_trailing_whitespace(&prv_spaces, &dst, out_line);
266 			output_accumulated_spaces(&prv_spaces, &dst);
267 			*dst = NUL;
268 
269 			if (fputs(out_line, stdout) == EOF)
270 			{
271 				fprintf(stderr, "Cannot write to output file %s: %s\n", argv[0], strerror(errno));
272 				exit(1);
273 			}
274 		}
275 	} while (--argc > 0);
276 	return 0;
277 }
278