xref: /dragonfly/usr.bin/xstr/xstr.c (revision 65cc0652)
1 /*
2  * Copyright (c) 1980, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * @(#) Copyright (c) 1980, 1993 The Regents of the University of California.  All rights reserved.
30  * @(#)xstr.c	8.1 (Berkeley) 6/9/93
31  * $FreeBSD: src/usr.bin/xstr/xstr.c,v 1.11 2008/05/13 09:42:03 kevlo Exp $
32  */
33 
34 #include <sys/types.h>
35 
36 #include <ctype.h>
37 #include <err.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <signal.h>
41 #include <string.h>
42 #include <unistd.h>
43 
44 #include "pathnames.h"
45 
46 /*
47  * xstr - extract and hash strings in a C program
48  *
49  * Bill Joy UCB
50  * November, 1978
51  */
52 
53 #define	ignore(a)	((void) a)
54 
55 static off_t	tellpt;
56 
57 static off_t	mesgpt;
58 static char	cstrings[] =	"strings";
59 static char	*strings =	cstrings;
60 
61 static int	cflg;
62 static int	vflg;
63 static int	readstd;
64 
65 static char lastchr(char *);
66 
67 static int fgetNUL(char *, int, FILE *);
68 static int istail(char *, char *);
69 static int octdigit(char);
70 static int xgetc(FILE *);
71 
72 static off_t hashit(char *, int);
73 static off_t yankstr(char **);
74 
75 static void usage(void) __dead2;
76 
77 static void flushsh(void);
78 static void found(int, off_t, char *);
79 static void inithash(void);
80 static void onintr(int) __dead2;
81 static void process(const char *);
82 static void prstr(char *);
83 static void xsdotc(void);
84 
85 int
86 main(int argc, char *argv[])
87 {
88 	int c;
89 	int fdesc;
90 
91 	while ((c = getopt(argc, argv, "-cv")) != -1)
92 		switch (c) {
93 		case '-':
94 			readstd++;
95 			break;
96 		case 'c':
97 			cflg++;
98 			break;
99 		case 'v':
100 			vflg++;
101 			break;
102 		default:
103 			usage();
104 		}
105 	argc -= optind;
106 	argv += optind;
107 
108 	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
109 		signal(SIGINT, onintr);
110 	if (cflg || (argc == 0 && !readstd))
111 		inithash();
112 	else {
113 		strings = strdup(_PATH_TMP);
114 		if (strings == NULL)
115 			err(1, "strdup() failed");
116 		fdesc = mkstemp(strings);
117 		if (fdesc == -1)
118 			err(1, "Unable to create temporary file");
119 		close(fdesc);
120 	}
121 
122 	while (readstd || argc > 0) {
123 		if (freopen("x.c", "w", stdout) == NULL)
124 			err(1, "x.c");
125 		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
126 			err(2, "%s", argv[0]);
127 		process("x.c");
128 		if (readstd == 0) {
129 			argc--;
130 			argv++;
131 		}
132 		else {
133 			readstd = 0;
134 		}
135 	}
136 	flushsh();
137 	if (cflg == 0)
138 		xsdotc();
139 	if (strings[0] == '/')
140 		ignore(unlink(strings));
141 	exit(0);
142 }
143 
144 static void
145 usage(void)
146 {
147 	fprintf(stderr, "usage: xstr [-cv] [-] [file ...]\n");
148 	exit (1);
149 }
150 
151 static char linebuf[BUFSIZ];
152 
153 static void
154 process(const char *name)
155 {
156 	char *cp;
157 	char c;
158 	int incomm = 0;
159 	int ret;
160 
161 	printf("extern char\txstr[];\n");
162 	for (;;) {
163 		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
164 			if (ferror(stdin))
165 				err(3, "%s", name);
166 			break;
167 		}
168 		if (linebuf[0] == '#') {
169 			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
170 				printf("#line%s", &linebuf[1]);
171 			else
172 				printf("%s", linebuf);
173 			continue;
174 		}
175 		for (cp = linebuf; (c = *cp++);) switch (c) {
176 
177 		case '"':
178 			if (incomm)
179 				goto def;
180 			if ((ret = (int) yankstr(&cp)) == -1)
181 				goto out;
182 			printf("(&xstr[%d])", ret);
183 			break;
184 
185 		case '\'':
186 			if (incomm)
187 				goto def;
188 			putchar(c);
189 			if (*cp)
190 				putchar(*cp++);
191 			break;
192 
193 		case '/':
194 			if (incomm || *cp != '*')
195 				goto def;
196 			incomm = 1;
197 			cp++;
198 			printf("/*");
199 			continue;
200 
201 		case '*':
202 			if (incomm && *cp == '/') {
203 				incomm = 0;
204 				cp++;
205 				printf("*/");
206 				continue;
207 			}
208 			goto def;
209 
210 def:
211 		default:
212 			putchar(c);
213 			break;
214 		}
215 	}
216 out:
217 	if (ferror(stdout)) {
218 		warn("x.c");
219 		onintr(0);
220 	}
221 
222 }
223 
224 static off_t
225 yankstr(char **cpp)
226 {
227 	char *cp = *cpp;
228 	char c, ch;
229 	char dbuf[BUFSIZ];
230 	char *dp = dbuf;
231 	char *tp;
232 	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
233 
234 	while ((c = *cp++)) {
235 		if (dp == dbuf + sizeof(dbuf) - 3)
236 			errx(1, "message too long");
237 		switch (c) {
238 
239 		case '"':
240 			cp++;
241 			goto out;
242 
243 		case '\\':
244 			c = *cp++;
245 			if (c == 0)
246 				break;
247 			if (c == '\n') {
248 				if (fgets(linebuf, sizeof linebuf, stdin)
249 				    == NULL) {
250 					if (ferror(stdin))
251 						err(3, "x.c");
252 					return(-1);
253 				}
254 				cp = linebuf;
255 				continue;
256 			}
257 			for (tp = tmp; (ch = *tp++); tp++)
258 				if (c == ch) {
259 					c = *tp;
260 					goto gotc;
261 				}
262 			if (!octdigit(c)) {
263 				*dp++ = '\\';
264 				break;
265 			}
266 			c -= '0';
267 			if (!octdigit(*cp))
268 				break;
269 			c <<= 3;
270 			c += *cp - '0';
271 			++cp;
272 			if (!octdigit(*cp))
273 				break;
274 			c <<= 3;
275 			c += *cp - '0';
276 			++cp;
277 			break;
278 		}
279 gotc:
280 		*dp++ = c;
281 	}
282 out:
283 	*cpp = --cp;
284 	*dp = 0;
285 	return (hashit(dbuf, 1));
286 }
287 
288 static int
289 octdigit(char c)
290 {
291 	return (isdigit(c) && c != '8' && c != '9');
292 }
293 
294 static void
295 inithash(void)
296 {
297 	char buf[BUFSIZ];
298 	FILE *mesgread = fopen(strings, "r");
299 
300 	if (mesgread == NULL)
301 		return;
302 	for (;;) {
303 		mesgpt = tellpt;
304 		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
305 			break;
306 		ignore(hashit(buf, 0));
307 	}
308 	ignore(fclose(mesgread));
309 }
310 
311 static int
312 fgetNUL(char *obuf, int rmdr, FILE *file)
313 {
314 	int c;
315 	char *buf = obuf;
316 
317 	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
318 		*buf++ = c;
319 	*buf++ = 0;
320 	return ((feof(file) || ferror(file)) ? 0 : 1);
321 }
322 
323 static int
324 xgetc(FILE *file)
325 {
326 
327 	tellpt++;
328 	return (getc(file));
329 }
330 
331 #define	BUCKETS	128
332 
333 static struct	hash {
334 	off_t	hpt;
335 	char	*hstr;
336 	struct	hash *hnext;
337 	short	hnew;
338 } bucket[BUCKETS];
339 
340 static off_t
341 hashit(char *str, int new)
342 {
343 	int i;
344 	struct hash *hp, *hp0;
345 
346 	hp = hp0 = &bucket[lastchr(str) & 0177];
347 	while (hp->hnext) {
348 		hp = hp->hnext;
349 		i = istail(str, hp->hstr);
350 		if (i >= 0)
351 			return (hp->hpt + i);
352 	}
353 	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
354 		errx(8, "calloc");
355 	hp->hpt = mesgpt;
356 	if (!(hp->hstr = strdup(str)))
357 		err(1, NULL);
358 	mesgpt += strlen(hp->hstr) + 1;
359 	hp->hnext = hp0->hnext;
360 	hp->hnew = new;
361 	hp0->hnext = hp;
362 	return (hp->hpt);
363 }
364 
365 static void
366 flushsh(void)
367 {
368 	int i;
369 	struct hash *hp;
370 	FILE *mesgwrit;
371 	int old = 0, new = 0;
372 
373 	for (i = 0; i < BUCKETS; i++)
374 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
375 			if (hp->hnew)
376 				new++;
377 			else
378 				old++;
379 	if (new == 0 && old != 0)
380 		return;
381 	mesgwrit = fopen(strings, old ? "r+" : "w");
382 	if (mesgwrit == NULL)
383 		err(4, "%s", strings);
384 	for (i = 0; i < BUCKETS; i++)
385 		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
386 			found(hp->hnew, hp->hpt, hp->hstr);
387 			if (hp->hnew) {
388 				fseek(mesgwrit, hp->hpt, 0);
389 				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
390 				if (ferror(mesgwrit))
391 					err(4, "%s", strings);
392 			}
393 		}
394 	if (fclose(mesgwrit) == EOF)
395 		err(4, "%s", strings);
396 }
397 
398 static void
399 found(int new, off_t off, char *str)
400 {
401 	if (vflg == 0)
402 		return;
403 	if (!new)
404 		fprintf(stderr, "found at %d:", (int) off);
405 	else
406 		fprintf(stderr, "new at %d:", (int) off);
407 	prstr(str);
408 	fprintf(stderr, "\n");
409 }
410 
411 static void
412 prstr(char *cp)
413 {
414 	int c;
415 
416 	while ((c = (*cp++ & 0377)))
417 		if (c < ' ')
418 			fprintf(stderr, "^%c", c + '`');
419 		else if (c == 0177)
420 			fprintf(stderr, "^?");
421 		else if (c > 0200)
422 			fprintf(stderr, "\\%03o", c);
423 		else
424 			fprintf(stderr, "%c", c);
425 }
426 
427 static void
428 xsdotc(void)
429 {
430 	FILE *strf = fopen(strings, "r");
431 	FILE *xdotcf;
432 
433 	if (strf == NULL)
434 		err(5, "%s", strings);
435 	xdotcf = fopen("xs.c", "w");
436 	if (xdotcf == NULL)
437 		err(6, "xs.c");
438 	fprintf(xdotcf, "char\txstr[] = {\n");
439 	for (;;) {
440 		int i, c;
441 
442 		for (i = 0; i < 8; i++) {
443 			c = getc(strf);
444 			if (ferror(strf)) {
445 				warn("%s", strings);
446 				onintr(0);
447 			}
448 			if (feof(strf)) {
449 				fprintf(xdotcf, "\n");
450 				goto out;
451 			}
452 			fprintf(xdotcf, "0x%02x,", c);
453 		}
454 		fprintf(xdotcf, "\n");
455 	}
456 out:
457 	fprintf(xdotcf, "};\n");
458 	ignore(fclose(xdotcf));
459 	ignore(fclose(strf));
460 }
461 
462 static char
463 lastchr(char *cp)
464 {
465 
466 	while (cp[0] && cp[1])
467 		cp++;
468 	return (*cp);
469 }
470 
471 static int
472 istail(char *str, char *of)
473 {
474 	int d = strlen(of) - strlen(str);
475 
476 	if (d < 0 || strcmp(&of[d], str) != 0)
477 		return (-1);
478 	return (d);
479 }
480 
481 static void
482 onintr(int dummy __unused)
483 {
484 
485 	ignore(signal(SIGINT, SIG_IGN));
486 	if (strings[0] == '/')
487 		ignore(unlink(strings));
488 	ignore(unlink("x.c"));
489 	ignore(unlink("xs.c"));
490 	exit(7);
491 }
492