1 /*
2  * internal.h - internal header stuff for the charset library.
3  */
4 
5 #ifndef charset_internal_h
6 #define charset_internal_h
7 
8 /* This invariably comes in handy */
9 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
10 
11 /* This is an invalid Unicode value used to indicate an error. */
12 #define ERROR 0xFFFFL		       /* Unicode value representing error */
13 
14 #undef TRUE
15 #define TRUE 1
16 #undef FALSE
17 #define FALSE 0
18 
19 typedef struct charset_spec charset_spec;
20 typedef struct sbcs_data sbcs_data;
21 
22 struct charset_spec {
23     int charset;		       /* numeric identifier */
24 
25     /*
26      * A function to read the character set and output Unicode
27      * characters. The `emit' function expects to get Unicode chars
28      * passed to it; it should be sent ERROR for any encoding error
29      * on the input.
30      */
31     void (*read)(charset_spec const *charset, long int input_chr,
32 		 charset_state *state,
33 		 void (*emit)(void *ctx, long int output), void *emitctx);
34     /*
35      * A function to read Unicode characters and output in this
36      * character set. The `emit' function expects to get byte
37      * values passed to it.
38      *
39      * A non-representable input character should cause a FALSE
40      * return, _before_ `emit' is called. Successful conversion
41      * causes a TRUE return.
42      *
43      * If `input_chr' is -1, this function must revert the encoding
44      * state to any default required at the end of a piece of
45      * encoded text.
46      */
47     int (*write)(charset_spec const *charset, long int input_chr,
48 		 charset_state *state,
49 		 void (*emit)(void *ctx, long int output), void *emitctx);
50     void const *data;
51 };
52 
53 /*
54  * This is the format of `data' used by the SBCS read and write
55  * functions; so it's the format used in all SBCS definitions.
56  */
57 struct sbcs_data {
58     /*
59      * This is a simple mapping table converting each SBCS position
60      * to a Unicode code point. Some positions may contain ERROR,
61      * indicating that that byte value is not defined in the SBCS
62      * in question and its occurrence in input is an error.
63      */
64     unsigned long sbcs2ucs[256];
65 
66     /*
67      * This lookup table is used to convert Unicode back to the
68      * SBCS. It consists of the valid byte values in the SBCS,
69      * sorted in order of their Unicode translation. So given a
70      * Unicode value U, you can do a binary search on this table
71      * using the above table as a lookup: when testing the Xth
72      * position in this table, you branch according to whether
73      * sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal
74      * to U.
75      *
76      * Note that since there may be fewer than 256 valid byte
77      * values in a particular SBCS, we must supply the length of
78      * this table as well as the contents.
79      */
80     unsigned char ucs2sbcs[256];
81     int nvalid;
82 };
83 
84 /*
85  * Prototypes for internal library functions.
86  */
87 charset_spec const *charset_find_spec(int charset);
88 void read_sbcs(charset_spec const *charset, long int input_chr,
89 	       charset_state *state,
90 	       void (*emit)(void *ctx, long int output), void *emitctx);
91 int write_sbcs(charset_spec const *charset, long int input_chr,
92 	       charset_state *state,
93 	       void (*emit)(void *ctx, long int output), void *emitctx);
94 long int sbcs_to_unicode(const struct sbcs_data *sd, long int input_chr);
95 long int sbcs_from_unicode(const struct sbcs_data *sd, long int input_chr);
96 
97 void read_utf8(charset_spec const *charset, long int input_chr,
98 	       charset_state *state,
99 	       void (*emit)(void *ctx, long int output), void *emitctx);
100 int write_utf8(charset_spec const *charset, long int input_chr,
101 	       charset_state *state,
102 	       void (*emit)(void *ctx, long int output),
103 	       void *emitctx);
104 
105 long int big5_to_unicode(int r, int c);
106 int unicode_to_big5(long int unicode, int *r, int *c);
107 long int cns11643_to_unicode(int p, int r, int c);
108 int unicode_to_cns11643(long int unicode, int *p, int *r, int *c);
109 long int cp949_to_unicode(int r, int c);
110 int unicode_to_cp949(long int unicode, int *r, int *c);
111 long int ksx1001_to_unicode(int r, int c);
112 int unicode_to_ksx1001(long int unicode, int *r, int *c);
113 long int gb2312_to_unicode(int r, int c);
114 int unicode_to_gb2312(long int unicode, int *r, int *c);
115 long int jisx0208_to_unicode(int r, int c);
116 int unicode_to_jisx0208(long int unicode, int *r, int *c);
117 long int jisx0212_to_unicode(int r, int c);
118 int unicode_to_jisx0212(long int unicode, int *r, int *c);
119 
120 /*
121  * Placate compiler warning about unused parameters, of which we
122  * expect to have some in this library.
123  */
124 #define UNUSEDARG(x) ( (x) = (x) )
125 
126 #endif /* charset_internal_h */
127