1 /* 2 * internal.h - internal header stuff for the charset library. 3 */ 4 5 #ifndef charset_internal_h 6 #define charset_internal_h 7 8 /* This invariably comes in handy */ 9 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) 10 11 /* This is an invalid Unicode value used to indicate an error. */ 12 #define ERROR 0xFFFFL /* Unicode value representing error */ 13 14 #undef TRUE 15 #define TRUE 1 16 #undef FALSE 17 #define FALSE 0 18 19 typedef struct charset_spec charset_spec; 20 typedef struct sbcs_data sbcs_data; 21 22 struct charset_spec { 23 int charset; /* numeric identifier */ 24 25 /* 26 * A function to read the character set and output Unicode 27 * characters. The `emit' function expects to get Unicode chars 28 * passed to it; it should be sent ERROR for any encoding error 29 * on the input. 30 */ 31 void (*read)(charset_spec const *charset, long int input_chr, 32 charset_state *state, 33 void (*emit)(void *ctx, long int output), void *emitctx); 34 /* 35 * A function to read Unicode characters and output in this 36 * character set. The `emit' function expects to get byte 37 * values passed to it. 38 * 39 * A non-representable input character should cause a FALSE 40 * return, _before_ `emit' is called. Successful conversion 41 * causes a TRUE return. 42 * 43 * If `input_chr' is -1, this function must revert the encoding 44 * state to any default required at the end of a piece of 45 * encoded text. 46 */ 47 int (*write)(charset_spec const *charset, long int input_chr, 48 charset_state *state, 49 void (*emit)(void *ctx, long int output), void *emitctx); 50 void const *data; 51 }; 52 53 /* 54 * This is the format of `data' used by the SBCS read and write 55 * functions; so it's the format used in all SBCS definitions. 56 */ 57 struct sbcs_data { 58 /* 59 * This is a simple mapping table converting each SBCS position 60 * to a Unicode code point. Some positions may contain ERROR, 61 * indicating that that byte value is not defined in the SBCS 62 * in question and its occurrence in input is an error. 63 */ 64 unsigned long sbcs2ucs[256]; 65 66 /* 67 * This lookup table is used to convert Unicode back to the 68 * SBCS. It consists of the valid byte values in the SBCS, 69 * sorted in order of their Unicode translation. So given a 70 * Unicode value U, you can do a binary search on this table 71 * using the above table as a lookup: when testing the Xth 72 * position in this table, you branch according to whether 73 * sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal 74 * to U. 75 * 76 * Note that since there may be fewer than 256 valid byte 77 * values in a particular SBCS, we must supply the length of 78 * this table as well as the contents. 79 */ 80 unsigned char ucs2sbcs[256]; 81 int nvalid; 82 }; 83 84 /* 85 * Prototypes for internal library functions. 86 */ 87 charset_spec const *charset_find_spec(int charset); 88 void read_sbcs(charset_spec const *charset, long int input_chr, 89 charset_state *state, 90 void (*emit)(void *ctx, long int output), void *emitctx); 91 int write_sbcs(charset_spec const *charset, long int input_chr, 92 charset_state *state, 93 void (*emit)(void *ctx, long int output), void *emitctx); 94 long int sbcs_to_unicode(const struct sbcs_data *sd, long int input_chr); 95 long int sbcs_from_unicode(const struct sbcs_data *sd, long int input_chr); 96 97 void read_utf8(charset_spec const *charset, long int input_chr, 98 charset_state *state, 99 void (*emit)(void *ctx, long int output), void *emitctx); 100 int write_utf8(charset_spec const *charset, long int input_chr, 101 charset_state *state, 102 void (*emit)(void *ctx, long int output), 103 void *emitctx); 104 105 long int big5_to_unicode(int r, int c); 106 int unicode_to_big5(long int unicode, int *r, int *c); 107 long int cns11643_to_unicode(int p, int r, int c); 108 int unicode_to_cns11643(long int unicode, int *p, int *r, int *c); 109 long int cp949_to_unicode(int r, int c); 110 int unicode_to_cp949(long int unicode, int *r, int *c); 111 long int ksx1001_to_unicode(int r, int c); 112 int unicode_to_ksx1001(long int unicode, int *r, int *c); 113 long int gb2312_to_unicode(int r, int c); 114 int unicode_to_gb2312(long int unicode, int *r, int *c); 115 long int jisx0208_to_unicode(int r, int c); 116 int unicode_to_jisx0208(long int unicode, int *r, int *c); 117 long int jisx0212_to_unicode(int r, int c); 118 int unicode_to_jisx0212(long int unicode, int *r, int *c); 119 120 /* 121 * Placate compiler warning about unused parameters, of which we 122 * expect to have some in this library. 123 */ 124 #define UNUSEDARG(x) ( (x) = (x) ) 125 126 #endif /* charset_internal_h */ 127