1 /* @(#)translit.c	1.19 21/08/20 Copyright 1985-2021 J. Schilling */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 	"@(#)translit.c	1.19 21/08/20 Copyright 1985-2021 J. Schilling";
6 #endif
7 
8 /*
9  *	translit - translate characters
10  *
11  *	translit fromset toset file1...filen
12  *
13  *	Copyright 1985-2021 J. Schilling
14  */
15 /*
16  * The contents of this file are subject to the terms of the
17  * Common Development and Distribution License, Version 1.0 only
18  * (the "License").  You may not use this file except in compliance
19  * with the License.
20  *
21  * See the file CDDL.Schily.txt in this distribution for details.
22  * A copy of the CDDL is also available via the Internet at
23  * http://www.opensource.org/licenses/cddl1.txt
24  *
25  * When distributing Covered Code, include this CDDL HEADER in each
26  * file and include the License file CDDL.Schily.txt from this distribution.
27  */
28 
29 #include <schily/stdio.h>
30 #include <schily/standard.h>
31 #include <schily/stdlib.h>
32 #include <schily/unistd.h>	/* Include sys/types.h */
33 #include <schily/utypes.h>
34 #include <schily/string.h>
35 #define	GT_COMERR		/* #define comerr gtcomerr */
36 #define	GT_ERROR		/* #define error gterror   */
37 #include <schily/schily.h>
38 #include <schily/nlsdefs.h>
39 
40 #define	TBUFSIZE	4096	/* Scratch buffer size for unescaped chars */
41 #define	NUMCHARS	256	/* TYPE_MAXVAL(Uchar) + 1		*/
42 
43 LOCAL	Uchar	trchars[256];	/* Character translation table		*/
44 LOCAL	Uchar	delchars[256];	/* Chars to delete from output		*/
45 LOCAL	Uchar	sqchars[256];	/* Multchars to replace w. single char  */
46 LOCAL	BOOL	cflag = FALSE;
47 LOCAL	BOOL	foldflag = FALSE;
48 LOCAL	Uchar	foldchar = '\0';
49 LOCAL	BOOL	is_translit;
50 
51 LOCAL	void	usage		__PR((int excode));
52 EXPORT	int	main		__PR((int ac, char **av));
53 LOCAL	void	tr		__PR((FILE *f));
54 LOCAL	void	buildtabs	__PR((Uchar *fromset, Uchar *toset,
55 							Uchar *sqset));
56 LOCAL	int	buildset	__PR((Uchar *inp, Uchar *buf, int bsize,
57 						char *tname, BOOL notflg));
58 LOCAL	char	unesc		__PR((Uchar **cpp));
59 LOCAL	int	inset		__PR((char c, Uchar *buf, int len));
60 LOCAL	int	etoolarge	__PR((char *s));
61 LOCAL	const char *filename	__PR((const char *name));
62 
63 LOCAL void
usage(excode)64 usage(excode)
65 	int	excode;
66 {
67 	error("Usage:	translit [options] fromset toset [file1...filen]\n");
68 	error("	-help	Print this help.\n");
69 	error("	-version Print version number.\n");
70 	error("	-c	Complement the set of values specified in 'fromset'.\n");
71 	error("	-d	Delete all characters specified in 'fromset'.\n");
72 	error("	-s	Replace repeated characters by a single character.\n");
73 	error("Standard in is used if no files are given.\n");
74 	exit(excode);
75 }
76 
77 EXPORT int
main(ac,av)78 main(ac, av)
79 	int	ac;
80 	char	*av[];
81 {
82 	FILE	*f;
83 	char	*opts = "help,version,c,d,s";
84 	Uchar	*fromset = NULL;
85 	Uchar	*toset = NULL;
86 	Uchar	*sqset = NULL;
87 	BOOL	help = FALSE;
88 	BOOL	prversion = FALSE;
89 	BOOL	delflg = FALSE;
90 	BOOL	sqflg = FALSE;
91 	int	cac;
92 	char * const* cav;
93 
94 	save_args(ac, av);
95 
96 	(void) setlocale(LC_ALL, "");
97 
98 #ifdef  USE_NLS
99 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
100 #define	TEXT_DOMAIN "translit"	/* Use this only if it weren't */
101 #endif
102 	{ char	*dir;
103 	dir = searchfileinpath("share/locale", F_OK,
104 					SIP_ANY_FILE|SIP_NO_PATH, NULL);
105 	if (dir)
106 		(void) bindtextdomain(TEXT_DOMAIN, dir);
107 	else
108 #if defined(PROTOTYPES) && defined(INS_BASE)
109 	(void) bindtextdomain(TEXT_DOMAIN, INS_BASE "/share/locale");
110 #else
111 	(void) bindtextdomain(TEXT_DOMAIN, "/usr/share/locale");
112 #endif
113 	(void) textdomain(TEXT_DOMAIN);
114 	}
115 #endif 	/* USE_NLS */
116 
117 
118 	is_translit = streql(filename(av[0]), "translit");
119 	cac = --ac;
120 	cav = ++av;
121 	file_raise((FILE *)NULL, FALSE);
122 
123 	if (getallargs(&cac, &cav, opts, &help, &prversion,
124 					&cflag, &delflg, &sqflg) < 0) {
125 		errmsgno(EX_BAD, "Bad flag: %s.\n", cav[0]);
126 		usage(EX_BAD);
127 	}
128 	if (help)
129 		usage(0);
130 	if (prversion) {
131 		gtprintf(
132 "Translit release %s (%s-%s-%s) Copyright (C) 1985-2021 %s\n",
133 				"1.19",
134 				HOST_CPU, HOST_VENDOR, HOST_OS,
135 				_("J�rg Schilling"));
136 		exit(0);
137 	}
138 
139 	cac = ac;
140 	cav = av;
141 	if (getfiles(&cac, &cav, opts) <= 0) {
142 		errmsgno(EX_BAD, "No 'from' string given.\n");
143 		usage(EX_BAD);
144 	}
145 	fromset = (Uchar *)cav[0];
146 	cac--, cav++;
147 
148 	if (!(delflg ^ sqflg)) {
149 		if (getfiles(&cac, &cav, opts) <= 0) {
150 			errmsgno(EX_BAD, "No 'to' string given.\n");
151 			usage(EX_BAD);
152 		}
153 		toset = (Uchar *)cav[0];
154 		cac--, cav++;
155 		if (sqflg)
156 			sqset = (Uchar *)toset;
157 	}
158 	if (delflg)
159 		toset = (Uchar *)"";
160 	else if (sqflg)
161 		sqset = (Uchar *)fromset;
162 
163 	buildtabs(fromset, toset, sqset);
164 
165 	if (getfiles(&cac, &cav, opts) > 0) {
166 		for (; getfiles(&cac, &cav, opts) > 0; cac--, cav++) {
167 			if (cav[0][0] == '-' && cav[0][1] == '\0') {
168 				f = stdin;
169 			} else {
170 				f = fileopen(cav[0], "r");
171 				if (f == NULL)
172 					comerr("Cannot open '%s'.\n", cav[0]);
173 			}
174 			tr(f);
175 			if (f != stdin)
176 				(void) fclose(f);
177 		}
178 	} else {
179 		tr(stdin);
180 	}
181 	return (0);
182 }
183 
184 LOCAL void
tr(f)185 tr(f)
186 	register FILE	*f;
187 {
188 	register int	lastc = EOF;
189 	register int	c;
190 	register int	oc;
191 
192 	while ((c = getc(f)) >= 0) {
193 		if (sqchars[c & 255]) {
194 			oc = c;
195 			if (oc != lastc)
196 				(void) putchar(oc);
197 			lastc = oc;
198 		} else if (!delchars[c & 255]) {
199 			oc = trchars[c & 255] & 255;
200 
201 			if (!foldflag || oc != lastc || oc != foldchar) {
202 				(void) putchar(oc);
203 			}
204 			lastc = oc;
205 		}
206 	}
207 	if (feof(f))
208 		return;
209 	if (ferror(f))
210 		comerr("Read error on input.\n");
211 }
212 
213 LOCAL void
buildtabs(fromset,toset,sqset)214 buildtabs(fromset, toset, sqset)
215 	Uchar 		*fromset;
216 	Uchar		*toset;
217 	Uchar		*sqset;
218 {
219 	Uchar		frombuf[256];
220 	Uchar		tobuf[256];
221 	Uchar		sqbuf[256];
222 	int		fromcnt;
223 	int		tocnt;
224 	int		sqcnt;
225 	register int	i;
226 
227 	/*
228 	 * Initialize all tables.
229 	 */
230 	for (i = 0; i < 256; i++) {
231 		trchars[i] = (Uchar) i;
232 		delchars[i] = FALSE;
233 		sqchars[i]  = FALSE;
234 	}
235 	fromcnt = buildset(fromset, frombuf, sizeof (frombuf), "from", cflag);
236 	tocnt = buildset(toset, tobuf, sizeof (tobuf), "to", FALSE);
237 	sqcnt = buildset(sqset, sqbuf, sizeof (sqbuf), "squeeze", FALSE);
238 	if (tocnt > fromcnt) {
239 		comerrno(EX_BAD, "'to' set larger than 'from' set.\n");
240 	} else if (tocnt == 0) {
241 		for (i = 0; i < fromcnt; i++)
242 			delchars[frombuf[i & 255] & 255] = TRUE;
243 	} else {
244 		foldchar = tobuf[tocnt-1];
245 		for (i = 0; i < fromcnt; i++) {
246 			if (tocnt >= 0 && i >= tocnt) {
247 				foldflag = TRUE;
248 				trchars[frombuf[i & 255] & 255] = foldchar;
249 			} else {
250 				trchars[frombuf[i & 255] & 255] = tobuf[i];
251 			}
252 		}
253 	}
254 	for (i = 0; i < sqcnt; i++) {
255 		sqchars[sqbuf[i & 255] & 255] = TRUE;
256 	}
257 	if (!is_translit)
258 		foldflag = FALSE;
259 }
260 
261 #define	put(c, p, l, tn)	((void)(((l)-- <= 0) && etoolarge(tn)), \
262 							*(p)++ = (c) & 255)
263 #define	vput(c, p, l, tn)	(void)put(c, p, l, tn)
264 
265 LOCAL int
buildset(inp,buf,bsize,tname,notflg)266 buildset(inp, buf, bsize, tname, notflg)
267 	Uchar	*inp;
268 	Uchar	*buf;
269 	int	bsize;
270 	char	*tname;
271 	BOOL	notflg;
272 {
273 	Uchar	set[TBUFSIZE];
274 	Uchar	*setp = set;
275 	int	setsize = TBUFSIZE;
276 register int	i;
277 register int	to;
278 
279 	if (inp == NULL)
280 		return (-1);
281 	buf[0] = '\0';
282 	set[0] = '\0';
283 	if (is_translit && !notflg) {
284 		if ((notflg = (*inp == '^')) != 0)
285 			inp++;
286 	}
287 	for (; *inp != '\0'; inp++) {
288 		switch (*inp) {
289 
290 		case '[':			/* Start of character class */
291 
292 			if (inp[1] == '\0') {		/* End of string */
293 				vput(*inp, setp, setsize, tname);
294 				break;
295 			}
296 
297 			for (inp++; *inp != '\0'; inp++) {
298 
299 				if (*inp == ']' || *inp == '\0')
300 					break;
301 				else if (*inp == '\\' && inp[1] != '\0')
302 					vput(unesc(&inp), setp, setsize, tname);
303 				else
304 					vput(*inp, setp, setsize, tname);
305 
306 				if (inp[1] == '-' &&
307 				    inp[2] != '\0' &&
308 				    inp[2] != ']') {
309 					inp += 2;
310 					i = setp[-1];
311 					if (*inp == '\\' && inp[1] != '\0')
312 						to = unesc(&inp);
313 					else
314 						to = *inp;
315 					i &= 255;
316 					to &= 255;
317 					if (i > to) {
318 						for (i--; i >= to; i--) {
319 							vput(i, setp, setsize,
320 									tname);
321 						}
322 					} else {
323 						for (i++; i <= to; i++) {
324 							vput(i, setp, setsize,
325 									tname);
326 						}
327 					}
328 				}
329 			}
330 			if (*inp != ']')
331 				comerrno(EX_BAD, "Missing ']'.\n");
332 			break;
333 
334 		case '\\':
335 			if (inp[1] != '\0') {
336 				vput(unesc(&inp), setp, setsize, tname);
337 				break;
338 			}
339 			/* FALLTHROUGH */
340 
341 		default:
342 			vput(*inp, setp, setsize, tname);
343 			break;
344 		}
345 	}
346 	setsize = TBUFSIZE - setsize;	/* Convert remaining to content size */
347 	if (notflg) {
348 		int	n = 0;
349 
350 		for (n = 0, i = 0; i < 256; i++) {
351 			if (!inset(i, set, setsize)) {
352 				n++;
353 				vput(i, buf, bsize, tname);
354 			}
355 		}
356 		setsize = n;
357 	} else {
358 		for (i = 0; i < setsize; i++)
359 			vput(set[i], buf, bsize, tname);
360 	}
361 	return (setsize);
362 }
363 
364 LOCAL char
unesc(cpp)365 unesc(cpp)
366 	Uchar	**cpp;
367 {
368 	char	c;
369 	int	result = 0;
370 	int	ndig = 0;
371 #define	octal(c)	(c >= '0' && c <= '7')
372 
373 	(*cpp)++;		/* Skip '\\' */
374 	switch (c = **cpp) {
375 
376 	case 'a':
377 		return (ALERT);
378 	case 'b':
379 		return ('\b');
380 	case 'f':
381 		return ('\f');
382 	case 'n':
383 		return ('\n');
384 	case 'r':
385 		return ('\r');
386 	case 't':
387 		return ('\t');
388 	case 'v':
389 		return ('\v');
390 	default:
391 		if (octal(c)) {
392 			for (; ndig < 3 && octal(c);
393 			    c = *(++(*cpp)), ndig++) {
394 				result = result * 8 + c - '0';
395 			}
396 			(*cpp)--;
397 		} else {
398 			result = c;
399 		}
400 		return (result & 255);
401 	}
402 }
403 
404 #ifdef	PROTOTYPES
405 LOCAL int
inset(char c,Uchar * buf,int len)406 inset(char c, Uchar *buf, int len)
407 #else
408 LOCAL int
409 inset(c, buf, len)
410 	char	c;
411 	Uchar	*buf;
412 	int	len;
413 #endif
414 {
415 	while (len-- > 0)
416 		if (c == *buf++)
417 			return (TRUE);
418 	return (FALSE);
419 }
420 
421 
422 LOCAL int
etoolarge(s)423 etoolarge(s)
424 	char	*s;
425 {
426 	comerrno(EX_BAD, "'%s' set too large.\n", s);
427 	/* NOTREACHED */
428 	return (0);
429 }
430 
431 LOCAL const char *
filename(name)432 filename(name)
433 	const char	*name;
434 {
435 	char	*p;
436 
437 	if ((p = strrchr(name, '/')) == NULL)
438 		return (name);
439 	return (++p);
440 }
441