1 /*
2 * csshow.c: display a character-set table on a terminal.
3 *
4 * Can display a selected block of Unicode, or the whole of any
5 * single-byte character set for which libcharset knows the
6 * translation.
7 *
8 * Intended mostly for quick-reference use - it might very well be
9 * quicker to type 'csshow U+0400' than to click around for ages in a
10 * browser finding the appropriate Unicode chart. But it also works
11 * well as a test of the specific font you've configured in your
12 * terminal window, of course.
13 *
14 * Possible extra features:
15 * - configurable row len and table size.
16 * - option to disambiguate the various classes of failure in the
17 * output, e.g. if terminfo gives us control sequences to change
18 * colours then we could colour the missing characters differently
19 * depending on why they're missing.
20 * + this mode probably implies that we must also display all
21 * characters in the range, whether printable or not, because
22 * the whole point might be to disambiguate the various causes
23 * of undisplayability. (In particular, don't forget to turn off
24 * the early exit when nothing in the range is printable at
25 * all.)
26 * - ability to display sub-blocks of multibyte encodings such as
27 * EUCs. But that would need some thought about how to sensibly
28 * index those tables.
29 */
30
31 #define _XOPEN_SOURCE 500 /* for wcwidth and snprintf */
32
33 #include <assert.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <locale.h>
38 #include <ctype.h>
39
40 #ifndef HAVE_NO_WCWIDTH
41 #include <wchar.h>
42 #endif
43
44 #ifndef HAVE_NO_WCTYPE
45 #include <wctype.h>
46 #endif
47
48 #include "charset.h"
49
50 static const char *helptext =
51 "usage: csshow ( CHARSET | BASE-UNICODE-VALUE )\n"
52 " e.g.: csshow Win1252\n"
53 " csshow U+2500\n"
54 " also: csshow --help display this help text\n"
55 ;
56
help(FILE * fp)57 static void help(FILE *fp)
58 {
59 fputs(helptext, fp);
60 }
61
62 enum Trans {
63 BAD_CHAR_IN_SOURCE_CHARSET,
64 BAD_CHAR_IN_OUTPUT_CHARSET,
65 UNPRINTABLE_CHAR,
66 FIRST_PRINTABLE_VALUE,
67 COMBINING_CHAR = FIRST_PRINTABLE_VALUE,
68 WIDE_PRINTABLE_CHAR,
69 NORMAL_PRINTABLE_CHAR
70 };
71 struct translated_char {
72 enum Trans type;
73 char buf[7]; /* maximum even theoretical UTF-8 code length, plus NUL */
74 };
75
main(int argc,char ** argv)76 int main(int argc, char **argv)
77 {
78 int doing_opts = 1;
79 int source_charset = CS_ASCII, output_charset = CS_NONE;
80 unsigned long base = 0, size = 0x100, rowlen = 0x10;
81
82 while (--argc > 0) {
83 const char *p = *++argv;
84 if (*p == '-' && doing_opts) {
85 if (!strcmp(p, "--")) {
86 doing_opts = 0;
87 } else if (!strcmp(p, "--help")) {
88 help(stdout);
89 return 0;
90 } else {
91 fprintf(stderr, "csshow: unrecognised option '%s'\n", p);
92 return 1;
93 }
94 } else {
95 int cs;
96
97 if (toupper((unsigned char)p[0]) == 'U' &&
98 (p[1] == '-' || p[1] == '+')) {
99 source_charset = CS_NONE; /* means just translate Unicode */
100 base = strtoul(p+2, NULL, 16);
101 } else if ((cs = charset_from_localenc(p)) != CS_NONE) {
102 if (!charset_is_single_byte(cs)) {
103 fprintf(stderr, "csshow: cannot display multibyte"
104 " charset %s\n", charset_to_localenc(cs));
105 return 1;
106 }
107 source_charset = cs;
108 base = 0;
109 } else {
110 fprintf(stderr, "csshow: unrecognised argument '%s'\n", p);
111 return 1;
112 }
113 }
114 }
115
116 #ifndef HAVE_NO_WCTYPE
117 setlocale(LC_CTYPE, "");
118 #endif
119
120 if (output_charset == CS_NONE)
121 output_charset = charset_from_locale();
122
123 {
124 struct translated_char *trans;
125 const char *rowheadfmt;
126 int rowheadwidth, colwidth;
127 int printed_a_line, skipped_a_line;
128 unsigned long i, j;
129
130 trans = malloc(size * sizeof(struct translated_char));
131 if (!trans) {
132 fprintf(stderr, "csshow: out of memory\n");
133 return 1;
134 }
135
136 /*
137 * Initial loop figuring out what characters we have in our
138 * block, and in what way each of them is weird.
139 */
140 for (i = 0; i < size; i++) {
141 unsigned long charcode = base + i;
142 wchar_t wc;
143
144 trans[i].buf[0] = '\0';
145
146 if (source_charset == CS_NONE) {
147 wc = charcode;
148 } else {
149 char c = charcode;
150 const char *cp = &c;
151 int clen = 1;
152 int error = 0;
153
154 int ret = charset_to_unicode(
155 &cp, &clen, &wc, 1, source_charset, NULL, L"", 0);
156 if (ret != 1) {
157 trans[i].type = BAD_CHAR_IN_SOURCE_CHARSET;
158 continue;
159 }
160 }
161
162 {
163 const wchar_t *wcp = &wc;
164 int wclen = 1;
165 int error = 0;
166
167 int ret = charset_from_unicode(
168 &wcp, &wclen, trans[i].buf, sizeof(trans[i].buf) - 1,
169 output_charset, NULL, &error);
170
171 assert(ret < sizeof(trans[i].buf));
172 trans[i].buf[ret] = '\0';
173
174 if (wclen != 0 || ret == 0 || error) {
175 trans[i].type = BAD_CHAR_IN_OUTPUT_CHARSET;
176 trans[i].buf[0] = '\0';
177 continue;
178 }
179 }
180
181 /*
182 * OK, we have a Unicode character and a corresponding
183 * UTF-8 sequence. But it might still be something we have
184 * to take care over printing.
185 */
186 #ifndef HAVE_NO_WCTYPE
187 if (!iswprint(wc)) {
188 trans[i].type = UNPRINTABLE_CHAR;
189 trans[i].buf[0] = '\0';
190 continue;
191 }
192 #endif
193 {
194 #ifndef HAVE_NO_WCWIDTH
195 int width = wcwidth(wc);
196 #else
197 int width = 1;
198 #endif
199
200 switch (width) {
201 case 0:
202 trans[i].type = COMBINING_CHAR;
203 break;
204 case 1:
205 trans[i].type = NORMAL_PRINTABLE_CHAR;
206 break;
207 case 2:
208 trans[i].type = WIDE_PRINTABLE_CHAR;
209 break;
210 default:
211 /* If we somehow had wcwidth but not wctype, weird
212 * returns from wcwidth give us a second way to
213 * identify non-printable control characters. */
214 trans[i].type = UNPRINTABLE_CHAR;
215 trans[i].buf[0] = '\0';
216 break;
217 }
218 }
219 }
220
221 /*
222 * Special case: if _nothing_ in our range is printable, we'll
223 * just terminate now.
224 */
225 for (i = 0; i < size; i++)
226 if (trans[i].type >= FIRST_PRINTABLE_VALUE)
227 break;
228 if (i == size) {
229 fprintf(stderr, "csshow: nothing printable at all in this"
230 " character range\n");
231 return 1;
232 }
233
234 /*
235 * Now we can figure out whether there are any wide
236 * characters, in which case we should space out our table a
237 * bit more. (We might also have to do that if rowlen is
238 * large.)
239 */
240 {
241 char testbuf[64];
242 colwidth = snprintf(testbuf, sizeof(testbuf),
243 "%-2x", (unsigned)(rowlen-1));
244 }
245 if (colwidth < 3) {
246 for (i = 0; i < size; i++)
247 if (trans[i].type == WIDE_PRINTABLE_CHAR)
248 colwidth = 3;
249 }
250
251 /*
252 * Work out the width of the heading column on the left.
253 */
254 if (source_charset == CS_NONE) {
255 rowheadfmt = "U+%-6.4X";
256 } else {
257 rowheadfmt = "%-4.2X";
258 }
259 {
260 char testbuf[64];
261 rowheadwidth = snprintf(testbuf, sizeof(testbuf),
262 rowheadfmt, (unsigned)(base + (size-1)));
263 }
264
265 /* Heading line. */
266 printf("%*s", rowheadwidth, "");
267 for (i = 0; i < rowlen; i++)
268 printf("%-*X", colwidth, (unsigned)i);
269 printf("\n");
270
271 printed_a_line = skipped_a_line = 0;
272
273 for (j = 0; j < size; j += rowlen) {
274 /* See if we're skipping this row completely. */
275 int skip = 1;
276 for (i = 0; i < rowlen && j+i < size; i++)
277 if (trans[j+i].type >= FIRST_PRINTABLE_VALUE)
278 skip = 0;
279 if (skip) {
280 skipped_a_line = 1;
281 continue;
282 }
283
284 /* We're printing this line, but we might need to print a
285 * blank line to indicate a previous skipped one. But we
286 * don't do that at the very start or end - we only
287 * indicate a skipped line between two that _were_
288 * printed. */
289 if (skipped_a_line && printed_a_line) {
290 printf("\n");
291 }
292 skipped_a_line = 0;
293
294 printed_a_line = 1;
295 printf(rowheadfmt, (unsigned)(base + j));;
296 for (i = 0; i < rowlen && j+i < size; i++) {
297 int chars_left = colwidth;
298 struct translated_char *chr = &trans[j+i];
299
300 switch (chr->type) {
301 case COMBINING_CHAR:
302 /* Print a space first, for the combining char to
303 * safely combine with. */
304 printf(" %s", chr->buf);
305 chars_left--;
306 break;
307 case WIDE_PRINTABLE_CHAR:
308 fputs(chr->buf, stdout);
309 chars_left -= 2;
310 break;
311 case NORMAL_PRINTABLE_CHAR:
312 fputs(chr->buf, stdout);
313 chars_left--;
314 break;
315 default:
316 /* Unprintable for one reason or another. */
317 break;
318 }
319
320 if (i+1 < rowlen && j+i+1 < size)
321 printf("%*s", chars_left, "");
322 }
323 printf("\n");
324 }
325 }
326
327 return 0;
328 }
329