1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */ 2 3 #ifndef __EF_CHARSET_H__ 4 #define __EF_CHARSET_H__ 5 6 #include <pobl/bl_types.h> /* u_xxx */ 7 8 /* 9 * ISO2022 Ft should be within 0x40('@') and 0x7e('~') except 10 * DEC_SPECIAL(Ft='0'). 11 */ 12 13 /* 0x00 - 0x4e (Ft is within 0x30 and 0x7e) (0x30-0x3f is for DRCS) */ 14 #define CS94SB_ID(c) ((u_char)(c) - 0x30) 15 /* 0x50 - 0x9e (Ft is within 0x30 and 0x7e) (0x30-0x3f is for DRCS) */ 16 #define CS96SB_ID(c) ((u_char)(c) + 0x20) 17 /* 0xa0 - 0xbf (XXX Ft is within 0x40 and 0x5f) */ 18 #define CS94MB_ID(c) ((u_char)(c) + 0x60) 19 /* No 96^n cs exists. */ 20 #define CS96MB_ID(c) UNKNOWN_CS 21 /* 0xc0 - 0xcf (Ft is within 0x40 and 0x4f) */ 22 #define NON_ISO2022_1_ID(c) ((u_char)(c) + 0x80) 23 /* 0xd0 - 0xdf (Ft is within 0x40 and 0x4f) */ 24 #define NON_ISO2022_2_ID(c) ((u_char)(c) + 0x90) 25 26 /* 0x100 - 0x1bf (= 0x100 | CS9XXB_ID) */ 27 #define CS_REVISION_1(cs) ((cs) + 0x100) 28 /* 0x200 - 0x2bf (= 0x200 | CS9XXB_ID) */ 29 #define CS_REVISION_2(cs) ((cs) + 0x200) 30 31 /* 32 * 'and 0xff' should be done because 0x100 - region is used for 'or cs_revision' 33 */ 34 35 #define CS94SB_FT(cs) (((cs)&0xff) + 0x30) 36 #define CS96SB_FT(cs) (((cs)&0xff) - 0x20) 37 #define CS94MB_FT(cs) (((cs)&0xff) - 0x60) 38 #define CS96MB_FT(cs) ' ' /* dummy */ 39 40 #define IS_CS94SB(cs) ((unsigned int)((cs)&0xff) <= 0x4e) /* same as 0x00 <= .. <= 0x4e */ 41 #define IS_CS96SB(cs) (0x50 <= ((cs)&0xff) && ((cs)&0xff) <= 0x9e) 42 #define IS_CS94MB(cs) (0xa0 <= ((cs)&0xff) && ((cs)&0xff) <= 0xbf) 43 #define IS_CS96MB(cs) (0) /* always false */ 44 #define IS_CS_BASED_ON_ISO2022(cs) (0x0 <= ((cs)&0xff) && ((cs)&0xff) <= 0xbf) 45 /* without "(cs) != UNKNOWN_CS &&", 0xa0 <= (UNKNOWN_CS & 0xff) returns true. */ 46 #define IS_NON_ISO2022(cs) ((cs) != UNKNOWN_CS && 0xc0 <= ((cs)&0xff)) 47 #define IS_ISCII(cs) (0xf0 <= (cs) && (cs) <= 0xfa) 48 #define IS_JIS_EXT(cs) (JISC6226_1978_NEC_EXT <= (cs) && (cs) <= SJIS_IBM_EXT) 49 50 #define IS_FULLWIDTH_CS(cs) (IS_CS94MB(cs) || IS_CS96MB(cs) || (0x1e0 <= (cs) && (cs) <= 0x1ff)) 51 #define CS_SIZE(cs) \ 52 ((cs) == ISO10646_UCS4_1 ? 4 : ((IS_FULLWIDTH_CS(cs) || (cs) == ISO10646_UCS2_1) ? 2 : 1)) 53 #define IS_ISO10646_UCS4(cs) (((cs) & ~CS_REVISION_1(0)) == ISO10646_UCS4_1) 54 55 /* 56 * These enumeration numbers are based on iso2022 Ft(0x30-0x7e). 57 * Total range is -1 <-> 0x2ff(int16). 58 */ 59 typedef enum ef_charset { 60 UNKNOWN_CS = -1, 61 62 /* 94 sb cs */ 63 DEC_SPECIAL = CS94SB_ID('0'), 64 DEC_TECHNICAL = CS94SB_ID('>'), 65 ISO646_IRV = CS94SB_ID('@'), 66 ISO646_EN = CS94SB_ID('A'), 67 US_ASCII = CS94SB_ID('B'), 68 NATS_PRIMARY_FOR_FIN_SWEDEN = CS94SB_ID('C'), 69 NATS_PRIMARY_FOR_DEN_NOR = CS94SB_ID('E'), 70 ISO646_SWEDEN = CS94SB_ID('G'), 71 ISO646_SWEDEN_NAME = CS94SB_ID('H'), 72 JISX0201_KATA = CS94SB_ID('I'), 73 JISX0201_ROMAN = CS94SB_ID('J'), 74 75 /* 96 sb cs */ 76 ISO8859_1_R = CS96SB_ID('A'), 77 ISO8859_2_R = CS96SB_ID('B'), 78 ISO8859_3_R = CS96SB_ID('C'), 79 ISO8859_4_R = CS96SB_ID('D'), 80 ISO8859_7_R = CS96SB_ID('F'), 81 ISO8859_6_R = CS96SB_ID('G'), 82 ISO8859_8_R = CS96SB_ID('H'), 83 ISO8859_5_R = CS96SB_ID('L'), 84 ISO8859_9_R = CS96SB_ID('M'), 85 ISO8859_10_R = CS96SB_ID('V'), 86 TIS620_2533 = CS96SB_ID('T'), 87 ISO8859_13_R = CS96SB_ID('Y'), /* Ft = 5/9 */ 88 ISO8859_14_R = CS96SB_ID('_'), /* Ft = 5/15 */ 89 90 ISO8859_15_R = CS96SB_ID('b'), /* Ft = 6/2 */ 91 ISO8859_16_R = CS96SB_ID('f'), /* Ft = 6/6 */ 92 TCVN5712_3_1993 = CS96SB_ID('Z'), 93 94 /* 94 mb cs */ 95 JISC6226_1978 = CS94MB_ID('@'), 96 GB2312_80 = CS94MB_ID('A'), 97 JISX0208_1983 = CS94MB_ID('B'), 98 KSC5601_1987 = CS94MB_ID('C'), 99 JISX0212_1990 = CS94MB_ID('D'), 100 CNS11643_1992_1 = CS94MB_ID('G'), 101 CNS11643_1992_2 = CS94MB_ID('H'), 102 CNS11643_1992_3 = CS94MB_ID('I'), 103 CNS11643_1992_4 = CS94MB_ID('J'), 104 CNS11643_1992_5 = CS94MB_ID('K'), 105 CNS11643_1992_6 = CS94MB_ID('L'), 106 CNS11643_1992_7 = CS94MB_ID('M'), 107 JISX0213_2000_1 = CS94MB_ID('O'), 108 JISX0213_2000_2 = CS94MB_ID('P'), 109 110 /* 96 mb cs */ 111 /* Nothing */ 112 113 /* NOT ISO2022 class 1 (ESC 2/5 Ft) */ 114 UTF1 = NON_ISO2022_1_ID('B'), 115 UTF8 = NON_ISO2022_1_ID('G'), 116 117 /* NOT ISO2022 class 2 (ESC 2/5 2/15 Ft) */ 118 XCT_NON_ISO2022_CS_1 = NON_ISO2022_2_ID('1'), /* CTEXT */ 119 XCT_NON_ISO2022_CS_2 = NON_ISO2022_2_ID('2'), /* CTEXT */ 120 ISO10646_UCS2_1 = NON_ISO2022_2_ID('@'), /* Including US_ASCII(0x0-0x7f) */ 121 ISO10646_UCS4_1 = NON_ISO2022_2_ID('A'), /* Including US_ASCII(0x0-0x7f) */ 122 123 /* Followings are mef original classifications */ 124 125 /* 126 * Those who are not ISO2022 registed characterset or do not confirm to 127 * ISO2022. 128 * 0xe0 - 0xfa 129 */ 130 VISCII = 0xe0, /* Excluding US_ASCII(0x0-0x7f) */ 131 TCVN5712_1_1993 = 0xe1, /* ISO2022 compat */ 132 KOI8_R = 0xe2, /* Excluding US_ASCII(0x0-0x7f) */ 133 KOI8_U = 0xe3, /* Excluding US_ASCII(0x0-0x7f) */ 134 KOI8_T = 0xe4, /* Excluding US_ASCII(0x0-0x7f) */ 135 GEORGIAN_PS = 0xe5, /* Excluding US_ASCII(0x0-0x7f) */ 136 CP1250 = 0xe6, /* Excluding US_ASCII(0x0-0x7f) */ 137 CP1251 = 0xe7, /* Excluding US_ASCII(0x0-0x7f) */ 138 CP1252 = 0xe8, /* Excluding US_ASCII(0x0-0x7f) */ 139 CP1253 = 0xe9, /* Excluding US_ASCII(0x0-0x7f) */ 140 CP1254 = 0xea, /* Excluding US_ASCII(0x0-0x7f) */ 141 CP1255 = 0xeb, /* Excluding US_ASCII(0x0-0x7f) */ 142 CP1256 = 0xec, /* Excluding US_ASCII(0x0-0x7f) */ 143 CP1257 = 0xed, /* Excluding US_ASCII(0x0-0x7f) */ 144 CP1258 = 0xee, /* Excluding US_ASCII(0x0-0x7f) */ 145 CP874 = 0xef, /* Excluding US_ASCII(0x0-0x7f) */ 146 ISCII_ASSAMESE = 0xf0, /* Excluding US_ASCII(0x0-0x7f) */ 147 ISCII_BENGALI = 0xf1, /* Excluding US_ASCII(0x0-0x7f) */ 148 ISCII_GUJARATI = 0xf2, /* Excluding US_ASCII(0x0-0x7f) */ 149 ISCII_HINDI = 0xf3, /* Excluding US_ASCII(0x0-0x7f) */ 150 ISCII_KANNADA = 0xf4, /* Excluding US_ASCII(0x0-0x7f) */ 151 ISCII_MALAYALAM = 0xf5, /* Excluding US_ASCII(0x0-0x7f) */ 152 ISCII_ORIYA = 0xf6, /* Excluding US_ASCII(0x0-0x7f) */ 153 ISCII_PUNJABI = 0xf7, /* Excluding US_ASCII(0x0-0x7f) */ 154 ISCII_TAMIL = 0xf8, /* Excluding US_ASCII(0x0-0x7f) */ 155 ISCII_TELUGU = 0xf9, /* Excluding US_ASCII(0x0-0x7f) */ 156 #if 0 157 ISCII_ROMAN = 0xfa, /* Excluding US_ASCII(0x0-0x7f) */ 158 #endif 159 160 /* Followings are ISO2022 based charsets with revisions. */ 161 162 /* Revision 1 */ 163 JISX0208_1990 = CS_REVISION_1(JISX0208_1983), 164 ISO10646_UCS4_1_V = CS_REVISION_1(ISO10646_UCS4_1), /* mef original */ 165 166 /* Followings are mef original classifications */ 167 168 /* 169 * Those who are not ISO2022 registed characterset but confirm to ISO2022. 170 * (Bi-width) 171 * 0x1e0 - 0xf5 172 */ 173 JISC6226_1978_NEC_EXT = 0x1e0, 174 JISC6226_1978_NECIBM_EXT = 0x1e1, 175 JISX0208_1983_MAC_EXT = 0x1e2, 176 177 /* 178 * Those who are not ISO2022 registed characterset or do not confirm to 179 * ISO2022. 180 * (Bi-width) 181 * 0x1e3 - 0x1e9 182 */ 183 SJIS_IBM_EXT = 0x1e3, 184 UHC = 0x1e4, 185 BIG5 = 0x1e5, 186 CNS11643_1992_EUCTW_G2 = 0x1e6, 187 GBK = 0x1e7, 188 JOHAB = 0x1e8, 189 HKSCS = 0x1e9, 190 191 MAX_CHARSET = 0x2ff 192 193 } ef_charset_t; 194 195 #endif 196