1 /* 2 * internal.h - internal header stuff for the charset library. 3 */ 4 5 #ifndef charset_internal_h 6 #define charset_internal_h 7 8 /* This invariably comes in handy */ 9 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) 10 11 /* This is an invalid Unicode value used to indicate an error. */ 12 #define ERROR 0xFFFFL /* Unicode value representing error */ 13 14 typedef struct charset_spec charset_spec; 15 typedef struct sbcs_data sbcs_data; 16 17 struct charset_spec { 18 int charset; /* numeric identifier */ 19 20 /* 21 * A function to read the character set and output Unicode 22 * characters. The `emit' function expects to get Unicode chars 23 * passed to it; it should be sent ERROR for any encoding error 24 * on the input. 25 */ 26 void (*read)(charset_spec const *charset, long int input_chr, 27 charset_state *state, 28 void (*emit)(void *ctx, long int output), void *emitctx); 29 /* 30 * A function to read Unicode characters and output in this 31 * character set. The `emit' function expects to get byte 32 * values passed to it; it should be sent ERROR for any 33 * non-representable characters on the input. 34 */ 35 void (*write)(charset_spec const *charset, long int input_chr, 36 charset_state *state, 37 void (*emit)(void *ctx, long int output), void *emitctx); 38 void const *data; 39 }; 40 41 /* 42 * This is the format of `data' used by the SBCS read and write 43 * functions; so it's the format used in all SBCS definitions. 44 */ 45 struct sbcs_data { 46 /* 47 * This is a simple mapping table converting each SBCS position 48 * to a Unicode code point. Some positions may contain ERROR, 49 * indicating that that byte value is not defined in the SBCS 50 * in question and its occurrence in input is an error. 51 */ 52 unsigned long sbcs2ucs[256]; 53 54 /* 55 * This lookup table is used to convert Unicode back to the 56 * SBCS. It consists of the valid byte values in the SBCS, 57 * sorted in order of their Unicode translation. So given a 58 * Unicode value U, you can do a binary search on this table 59 * using the above table as a lookup: when testing the Xth 60 * position in this table, you branch according to whether 61 * sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal 62 * to U. 63 * 64 * Note that since there may be fewer than 256 valid byte 65 * values in a particular SBCS, we must supply the length of 66 * this table as well as the contents. 67 */ 68 unsigned char ucs2sbcs[256]; 69 int nvalid; 70 }; 71 72 /* 73 * Prototypes for internal library functions. 74 */ 75 charset_spec const *charset_find_spec(int charset); 76 void read_sbcs(charset_spec const *charset, long int input_chr, 77 charset_state *state, 78 void (*emit)(void *ctx, long int output), void *emitctx); 79 void write_sbcs(charset_spec const *charset, long int input_chr, 80 charset_state *state, 81 void (*emit)(void *ctx, long int output), void *emitctx); 82 83 /* 84 * Placate compiler warning about unused parameters, of which we 85 * expect to have some in this library. 86 */ 87 #define UNUSEDARG(x) ( (x) = (x) ) 88 89 #endif /* charset_internal_h */ 90