1 /* @(#)translit.c 1.19 21/08/20 Copyright 1985-2021 J. Schilling */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static UConst char sccsid[] =
5 "@(#)translit.c 1.19 21/08/20 Copyright 1985-2021 J. Schilling";
6 #endif
7
8 /*
9 * translit - translate characters
10 *
11 * translit fromset toset file1...filen
12 *
13 * Copyright 1985-2021 J. Schilling
14 */
15 /*
16 * The contents of this file are subject to the terms of the
17 * Common Development and Distribution License, Version 1.0 only
18 * (the "License"). You may not use this file except in compliance
19 * with the License.
20 *
21 * See the file CDDL.Schily.txt in this distribution for details.
22 * A copy of the CDDL is also available via the Internet at
23 * http://www.opensource.org/licenses/cddl1.txt
24 *
25 * When distributing Covered Code, include this CDDL HEADER in each
26 * file and include the License file CDDL.Schily.txt from this distribution.
27 */
28
29 #include <schily/stdio.h>
30 #include <schily/standard.h>
31 #include <schily/stdlib.h>
32 #include <schily/unistd.h> /* Include sys/types.h */
33 #include <schily/utypes.h>
34 #include <schily/string.h>
35 #define GT_COMERR /* #define comerr gtcomerr */
36 #define GT_ERROR /* #define error gterror */
37 #include <schily/schily.h>
38 #include <schily/nlsdefs.h>
39
40 #define TBUFSIZE 4096 /* Scratch buffer size for unescaped chars */
41 #define NUMCHARS 256 /* TYPE_MAXVAL(Uchar) + 1 */
42
43 LOCAL Uchar trchars[256]; /* Character translation table */
44 LOCAL Uchar delchars[256]; /* Chars to delete from output */
45 LOCAL Uchar sqchars[256]; /* Multchars to replace w. single char */
46 LOCAL BOOL cflag = FALSE;
47 LOCAL BOOL foldflag = FALSE;
48 LOCAL Uchar foldchar = '\0';
49 LOCAL BOOL is_translit;
50
51 LOCAL void usage __PR((int excode));
52 EXPORT int main __PR((int ac, char **av));
53 LOCAL void tr __PR((FILE *f));
54 LOCAL void buildtabs __PR((Uchar *fromset, Uchar *toset,
55 Uchar *sqset));
56 LOCAL int buildset __PR((Uchar *inp, Uchar *buf, int bsize,
57 char *tname, BOOL notflg));
58 LOCAL char unesc __PR((Uchar **cpp));
59 LOCAL int inset __PR((char c, Uchar *buf, int len));
60 LOCAL int etoolarge __PR((char *s));
61 LOCAL const char *filename __PR((const char *name));
62
63 LOCAL void
usage(excode)64 usage(excode)
65 int excode;
66 {
67 error("Usage: translit [options] fromset toset [file1...filen]\n");
68 error(" -help Print this help.\n");
69 error(" -version Print version number.\n");
70 error(" -c Complement the set of values specified in 'fromset'.\n");
71 error(" -d Delete all characters specified in 'fromset'.\n");
72 error(" -s Replace repeated characters by a single character.\n");
73 error("Standard in is used if no files are given.\n");
74 exit(excode);
75 }
76
77 EXPORT int
main(ac,av)78 main(ac, av)
79 int ac;
80 char *av[];
81 {
82 FILE *f;
83 char *opts = "help,version,c,d,s";
84 Uchar *fromset = NULL;
85 Uchar *toset = NULL;
86 Uchar *sqset = NULL;
87 BOOL help = FALSE;
88 BOOL prversion = FALSE;
89 BOOL delflg = FALSE;
90 BOOL sqflg = FALSE;
91 int cac;
92 char * const* cav;
93
94 save_args(ac, av);
95
96 (void) setlocale(LC_ALL, "");
97
98 #ifdef USE_NLS
99 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
100 #define TEXT_DOMAIN "translit" /* Use this only if it weren't */
101 #endif
102 { char *dir;
103 dir = searchfileinpath("share/locale", F_OK,
104 SIP_ANY_FILE|SIP_NO_PATH, NULL);
105 if (dir)
106 (void) bindtextdomain(TEXT_DOMAIN, dir);
107 else
108 #if defined(PROTOTYPES) && defined(INS_BASE)
109 (void) bindtextdomain(TEXT_DOMAIN, INS_BASE "/share/locale");
110 #else
111 (void) bindtextdomain(TEXT_DOMAIN, "/usr/share/locale");
112 #endif
113 (void) textdomain(TEXT_DOMAIN);
114 }
115 #endif /* USE_NLS */
116
117
118 is_translit = streql(filename(av[0]), "translit");
119 cac = --ac;
120 cav = ++av;
121 file_raise((FILE *)NULL, FALSE);
122
123 if (getallargs(&cac, &cav, opts, &help, &prversion,
124 &cflag, &delflg, &sqflg) < 0) {
125 errmsgno(EX_BAD, "Bad flag: %s.\n", cav[0]);
126 usage(EX_BAD);
127 }
128 if (help)
129 usage(0);
130 if (prversion) {
131 gtprintf(
132 "Translit release %s (%s-%s-%s) Copyright (C) 1985-2021 %s\n",
133 "1.19",
134 HOST_CPU, HOST_VENDOR, HOST_OS,
135 _("J�rg Schilling"));
136 exit(0);
137 }
138
139 cac = ac;
140 cav = av;
141 if (getfiles(&cac, &cav, opts) <= 0) {
142 errmsgno(EX_BAD, "No 'from' string given.\n");
143 usage(EX_BAD);
144 }
145 fromset = (Uchar *)cav[0];
146 cac--, cav++;
147
148 if (!(delflg ^ sqflg)) {
149 if (getfiles(&cac, &cav, opts) <= 0) {
150 errmsgno(EX_BAD, "No 'to' string given.\n");
151 usage(EX_BAD);
152 }
153 toset = (Uchar *)cav[0];
154 cac--, cav++;
155 if (sqflg)
156 sqset = (Uchar *)toset;
157 }
158 if (delflg)
159 toset = (Uchar *)"";
160 else if (sqflg)
161 sqset = (Uchar *)fromset;
162
163 buildtabs(fromset, toset, sqset);
164
165 if (getfiles(&cac, &cav, opts) > 0) {
166 for (; getfiles(&cac, &cav, opts) > 0; cac--, cav++) {
167 if (cav[0][0] == '-' && cav[0][1] == '\0') {
168 f = stdin;
169 } else {
170 f = fileopen(cav[0], "r");
171 if (f == NULL)
172 comerr("Cannot open '%s'.\n", cav[0]);
173 }
174 tr(f);
175 if (f != stdin)
176 (void) fclose(f);
177 }
178 } else {
179 tr(stdin);
180 }
181 return (0);
182 }
183
184 LOCAL void
tr(f)185 tr(f)
186 register FILE *f;
187 {
188 register int lastc = EOF;
189 register int c;
190 register int oc;
191
192 while ((c = getc(f)) >= 0) {
193 if (sqchars[c & 255]) {
194 oc = c;
195 if (oc != lastc)
196 (void) putchar(oc);
197 lastc = oc;
198 } else if (!delchars[c & 255]) {
199 oc = trchars[c & 255] & 255;
200
201 if (!foldflag || oc != lastc || oc != foldchar) {
202 (void) putchar(oc);
203 }
204 lastc = oc;
205 }
206 }
207 if (feof(f))
208 return;
209 if (ferror(f))
210 comerr("Read error on input.\n");
211 }
212
213 LOCAL void
buildtabs(fromset,toset,sqset)214 buildtabs(fromset, toset, sqset)
215 Uchar *fromset;
216 Uchar *toset;
217 Uchar *sqset;
218 {
219 Uchar frombuf[256];
220 Uchar tobuf[256];
221 Uchar sqbuf[256];
222 int fromcnt;
223 int tocnt;
224 int sqcnt;
225 register int i;
226
227 /*
228 * Initialize all tables.
229 */
230 for (i = 0; i < 256; i++) {
231 trchars[i] = (Uchar) i;
232 delchars[i] = FALSE;
233 sqchars[i] = FALSE;
234 }
235 fromcnt = buildset(fromset, frombuf, sizeof (frombuf), "from", cflag);
236 tocnt = buildset(toset, tobuf, sizeof (tobuf), "to", FALSE);
237 sqcnt = buildset(sqset, sqbuf, sizeof (sqbuf), "squeeze", FALSE);
238 if (tocnt > fromcnt) {
239 comerrno(EX_BAD, "'to' set larger than 'from' set.\n");
240 } else if (tocnt == 0) {
241 for (i = 0; i < fromcnt; i++)
242 delchars[frombuf[i & 255] & 255] = TRUE;
243 } else {
244 foldchar = tobuf[tocnt-1];
245 for (i = 0; i < fromcnt; i++) {
246 if (tocnt >= 0 && i >= tocnt) {
247 foldflag = TRUE;
248 trchars[frombuf[i & 255] & 255] = foldchar;
249 } else {
250 trchars[frombuf[i & 255] & 255] = tobuf[i];
251 }
252 }
253 }
254 for (i = 0; i < sqcnt; i++) {
255 sqchars[sqbuf[i & 255] & 255] = TRUE;
256 }
257 if (!is_translit)
258 foldflag = FALSE;
259 }
260
261 #define put(c, p, l, tn) ((void)(((l)-- <= 0) && etoolarge(tn)), \
262 *(p)++ = (c) & 255)
263 #define vput(c, p, l, tn) (void)put(c, p, l, tn)
264
265 LOCAL int
buildset(inp,buf,bsize,tname,notflg)266 buildset(inp, buf, bsize, tname, notflg)
267 Uchar *inp;
268 Uchar *buf;
269 int bsize;
270 char *tname;
271 BOOL notflg;
272 {
273 Uchar set[TBUFSIZE];
274 Uchar *setp = set;
275 int setsize = TBUFSIZE;
276 register int i;
277 register int to;
278
279 if (inp == NULL)
280 return (-1);
281 buf[0] = '\0';
282 set[0] = '\0';
283 if (is_translit && !notflg) {
284 if ((notflg = (*inp == '^')) != 0)
285 inp++;
286 }
287 for (; *inp != '\0'; inp++) {
288 switch (*inp) {
289
290 case '[': /* Start of character class */
291
292 if (inp[1] == '\0') { /* End of string */
293 vput(*inp, setp, setsize, tname);
294 break;
295 }
296
297 for (inp++; *inp != '\0'; inp++) {
298
299 if (*inp == ']' || *inp == '\0')
300 break;
301 else if (*inp == '\\' && inp[1] != '\0')
302 vput(unesc(&inp), setp, setsize, tname);
303 else
304 vput(*inp, setp, setsize, tname);
305
306 if (inp[1] == '-' &&
307 inp[2] != '\0' &&
308 inp[2] != ']') {
309 inp += 2;
310 i = setp[-1];
311 if (*inp == '\\' && inp[1] != '\0')
312 to = unesc(&inp);
313 else
314 to = *inp;
315 i &= 255;
316 to &= 255;
317 if (i > to) {
318 for (i--; i >= to; i--) {
319 vput(i, setp, setsize,
320 tname);
321 }
322 } else {
323 for (i++; i <= to; i++) {
324 vput(i, setp, setsize,
325 tname);
326 }
327 }
328 }
329 }
330 if (*inp != ']')
331 comerrno(EX_BAD, "Missing ']'.\n");
332 break;
333
334 case '\\':
335 if (inp[1] != '\0') {
336 vput(unesc(&inp), setp, setsize, tname);
337 break;
338 }
339 /* FALLTHROUGH */
340
341 default:
342 vput(*inp, setp, setsize, tname);
343 break;
344 }
345 }
346 setsize = TBUFSIZE - setsize; /* Convert remaining to content size */
347 if (notflg) {
348 int n = 0;
349
350 for (n = 0, i = 0; i < 256; i++) {
351 if (!inset(i, set, setsize)) {
352 n++;
353 vput(i, buf, bsize, tname);
354 }
355 }
356 setsize = n;
357 } else {
358 for (i = 0; i < setsize; i++)
359 vput(set[i], buf, bsize, tname);
360 }
361 return (setsize);
362 }
363
364 LOCAL char
unesc(cpp)365 unesc(cpp)
366 Uchar **cpp;
367 {
368 char c;
369 int result = 0;
370 int ndig = 0;
371 #define octal(c) (c >= '0' && c <= '7')
372
373 (*cpp)++; /* Skip '\\' */
374 switch (c = **cpp) {
375
376 case 'a':
377 return (ALERT);
378 case 'b':
379 return ('\b');
380 case 'f':
381 return ('\f');
382 case 'n':
383 return ('\n');
384 case 'r':
385 return ('\r');
386 case 't':
387 return ('\t');
388 case 'v':
389 return ('\v');
390 default:
391 if (octal(c)) {
392 for (; ndig < 3 && octal(c);
393 c = *(++(*cpp)), ndig++) {
394 result = result * 8 + c - '0';
395 }
396 (*cpp)--;
397 } else {
398 result = c;
399 }
400 return (result & 255);
401 }
402 }
403
404 #ifdef PROTOTYPES
405 LOCAL int
inset(char c,Uchar * buf,int len)406 inset(char c, Uchar *buf, int len)
407 #else
408 LOCAL int
409 inset(c, buf, len)
410 char c;
411 Uchar *buf;
412 int len;
413 #endif
414 {
415 while (len-- > 0)
416 if (c == *buf++)
417 return (TRUE);
418 return (FALSE);
419 }
420
421
422 LOCAL int
etoolarge(s)423 etoolarge(s)
424 char *s;
425 {
426 comerrno(EX_BAD, "'%s' set too large.\n", s);
427 /* NOTREACHED */
428 return (0);
429 }
430
431 LOCAL const char *
filename(name)432 filename(name)
433 const char *name;
434 {
435 char *p;
436
437 if ((p = strrchr(name, '/')) == NULL)
438 return (name);
439 return (++p);
440 }
441