1 /* -*- c-basic-offset:2; tab-width:2; indent-tabs-mode:nil -*- */
2 
3 #ifndef __EF_CHARSET_H__
4 #define __EF_CHARSET_H__
5 
6 #include <pobl/bl_types.h> /* u_xxx */
7 
8 /*
9  * ISO2022 Ft should be within 0x40('@') and 0x7e('~') except
10  * DEC_SPECIAL(Ft='0').
11  */
12 
13 /* 0x00 - 0x4e (Ft is within 0x30 and 0x7e) (0x30-0x3f is for DRCS) */
14 #define CS94SB_ID(c) ((u_char)(c) - 0x30)
15 /* 0x50 - 0x9e (Ft is within 0x30 and 0x7e) (0x30-0x3f is for DRCS) */
16 #define CS96SB_ID(c) ((u_char)(c) + 0x20)
17 /* 0xa0 - 0xbf (XXX Ft is within 0x40 and 0x5f) */
18 #define CS94MB_ID(c) ((u_char)(c) + 0x60)
19 /* No 96^n cs exists. */
20 #define CS96MB_ID(c) UNKNOWN_CS
21 /* 0xc0 - 0xcf (Ft is within 0x40 and 0x4f) */
22 #define NON_ISO2022_1_ID(c) ((u_char)(c) + 0x80)
23 /* 0xd0 - 0xdf (Ft is within 0x40 and 0x4f) */
24 #define NON_ISO2022_2_ID(c) ((u_char)(c) + 0x90)
25 
26 /* 0x100 - 0x1bf (= 0x100 | CS9XXB_ID) */
27 #define CS_REVISION_1(cs) ((cs) + 0x100)
28 /* 0x200 - 0x2bf (= 0x200 | CS9XXB_ID) */
29 #define CS_REVISION_2(cs) ((cs) + 0x200)
30 
31 /*
32  * 'and 0xff' should be done because 0x100 - region is used for 'or cs_revision'
33  */
34 
35 #define CS94SB_FT(cs) (((cs)&0xff) + 0x30)
36 #define CS96SB_FT(cs) (((cs)&0xff) - 0x20)
37 #define CS94MB_FT(cs) (((cs)&0xff) - 0x60)
38 #define CS96MB_FT(cs) ' ' /* dummy */
39 
40 #define IS_CS94SB(cs) ((unsigned int)((cs)&0xff) <= 0x4e) /* same as 0x00 <= .. <= 0x4e */
41 #define IS_CS96SB(cs) (0x50 <= ((cs)&0xff) && ((cs)&0xff) <= 0x9e)
42 #define IS_CS94MB(cs) (0xa0 <= ((cs)&0xff) && ((cs)&0xff) <= 0xbf)
43 #define IS_CS96MB(cs) (0) /* always false */
44 #define IS_CS_BASED_ON_ISO2022(cs) (0x0 <= ((cs)&0xff) && ((cs)&0xff) <= 0xbf)
45 /* without "(cs) != UNKNOWN_CS &&", 0xa0 <= (UNKNOWN_CS & 0xff) returns true. */
46 #define IS_NON_ISO2022(cs) ((cs) != UNKNOWN_CS && 0xc0 <= ((cs)&0xff))
47 #define IS_ISCII(cs) (0xf0 <= (cs) && (cs) <= 0xfa)
48 #define IS_JIS_EXT(cs) (JISC6226_1978_NEC_EXT <= (cs) && (cs) <= SJIS_IBM_EXT)
49 
50 #define IS_FULLWIDTH_CS(cs) (IS_CS94MB(cs) || IS_CS96MB(cs) || (0x1e0 <= (cs) && (cs) <= 0x1ff))
51 #define CS_SIZE(cs) \
52   ((cs) == ISO10646_UCS4_1 ? 4 : ((IS_FULLWIDTH_CS(cs) || (cs) == ISO10646_UCS2_1) ? 2 : 1))
53 #define IS_ISO10646_UCS4(cs) (((cs) & ~CS_REVISION_1(0)) == ISO10646_UCS4_1)
54 
55 /*
56  * These enumeration numbers are based on iso2022 Ft(0x30-0x7e).
57  * Total range is -1 <-> 0x2ff(int16).
58  */
59 typedef enum ef_charset {
60   UNKNOWN_CS = -1,
61 
62   /* 94 sb cs */
63   DEC_SPECIAL = CS94SB_ID('0'),
64   DEC_TECHNICAL = CS94SB_ID('>'),
65   ISO646_IRV = CS94SB_ID('@'),
66   ISO646_EN = CS94SB_ID('A'),
67   US_ASCII = CS94SB_ID('B'),
68   NATS_PRIMARY_FOR_FIN_SWEDEN = CS94SB_ID('C'),
69   NATS_PRIMARY_FOR_DEN_NOR = CS94SB_ID('E'),
70   ISO646_SWEDEN = CS94SB_ID('G'),
71   ISO646_SWEDEN_NAME = CS94SB_ID('H'),
72   JISX0201_KATA = CS94SB_ID('I'),
73   JISX0201_ROMAN = CS94SB_ID('J'),
74 
75   /* 96 sb cs */
76   ISO8859_1_R = CS96SB_ID('A'),
77   ISO8859_2_R = CS96SB_ID('B'),
78   ISO8859_3_R = CS96SB_ID('C'),
79   ISO8859_4_R = CS96SB_ID('D'),
80   ISO8859_7_R = CS96SB_ID('F'),
81   ISO8859_6_R = CS96SB_ID('G'),
82   ISO8859_8_R = CS96SB_ID('H'),
83   ISO8859_5_R = CS96SB_ID('L'),
84   ISO8859_9_R = CS96SB_ID('M'),
85   ISO8859_10_R = CS96SB_ID('V'),
86   TIS620_2533 = CS96SB_ID('T'),
87   ISO8859_13_R = CS96SB_ID('Y'), /* Ft = 5/9 */
88   ISO8859_14_R = CS96SB_ID('_'), /* Ft = 5/15 */
89 
90   ISO8859_15_R = CS96SB_ID('b'), /* Ft = 6/2 */
91   ISO8859_16_R = CS96SB_ID('f'), /* Ft = 6/6 */
92   TCVN5712_3_1993 = CS96SB_ID('Z'),
93 
94   /* 94 mb cs */
95   JISC6226_1978 = CS94MB_ID('@'),
96   GB2312_80 = CS94MB_ID('A'),
97   JISX0208_1983 = CS94MB_ID('B'),
98   KSC5601_1987 = CS94MB_ID('C'),
99   JISX0212_1990 = CS94MB_ID('D'),
100   CNS11643_1992_1 = CS94MB_ID('G'),
101   CNS11643_1992_2 = CS94MB_ID('H'),
102   CNS11643_1992_3 = CS94MB_ID('I'),
103   CNS11643_1992_4 = CS94MB_ID('J'),
104   CNS11643_1992_5 = CS94MB_ID('K'),
105   CNS11643_1992_6 = CS94MB_ID('L'),
106   CNS11643_1992_7 = CS94MB_ID('M'),
107   JISX0213_2000_1 = CS94MB_ID('O'),
108   JISX0213_2000_2 = CS94MB_ID('P'),
109 
110   /* 96 mb cs */
111   /* Nothing */
112 
113   /* NOT ISO2022 class 1 (ESC 2/5 Ft) */
114   UTF1 = NON_ISO2022_1_ID('B'),
115   UTF8 = NON_ISO2022_1_ID('G'),
116 
117   /* NOT ISO2022 class 2 (ESC 2/5 2/15 Ft) */
118   XCT_NON_ISO2022_CS_1 = NON_ISO2022_2_ID('1'), /* CTEXT */
119   XCT_NON_ISO2022_CS_2 = NON_ISO2022_2_ID('2'), /* CTEXT */
120   ISO10646_UCS2_1 = NON_ISO2022_2_ID('@'),      /* Including US_ASCII(0x0-0x7f) */
121   ISO10646_UCS4_1 = NON_ISO2022_2_ID('A'),      /* Including US_ASCII(0x0-0x7f) */
122 
123   /* Followings are mef original classifications */
124 
125   /*
126    * Those who are not ISO2022 registed characterset or do not confirm to
127    * ISO2022.
128    * 0xe0 - 0xfa
129    */
130   VISCII = 0xe0,          /* Excluding US_ASCII(0x0-0x7f) */
131   TCVN5712_1_1993 = 0xe1, /* ISO2022 compat */
132   KOI8_R = 0xe2,          /* Excluding US_ASCII(0x0-0x7f) */
133   KOI8_U = 0xe3,          /* Excluding US_ASCII(0x0-0x7f) */
134   KOI8_T = 0xe4,          /* Excluding US_ASCII(0x0-0x7f) */
135   GEORGIAN_PS = 0xe5,     /* Excluding US_ASCII(0x0-0x7f) */
136   CP1250 = 0xe6,          /* Excluding US_ASCII(0x0-0x7f) */
137   CP1251 = 0xe7,          /* Excluding US_ASCII(0x0-0x7f) */
138   CP1252 = 0xe8,          /* Excluding US_ASCII(0x0-0x7f) */
139   CP1253 = 0xe9,          /* Excluding US_ASCII(0x0-0x7f) */
140   CP1254 = 0xea,          /* Excluding US_ASCII(0x0-0x7f) */
141   CP1255 = 0xeb,          /* Excluding US_ASCII(0x0-0x7f) */
142   CP1256 = 0xec,          /* Excluding US_ASCII(0x0-0x7f) */
143   CP1257 = 0xed,          /* Excluding US_ASCII(0x0-0x7f) */
144   CP1258 = 0xee,          /* Excluding US_ASCII(0x0-0x7f) */
145   CP874 = 0xef,           /* Excluding US_ASCII(0x0-0x7f) */
146   ISCII_ASSAMESE = 0xf0,  /* Excluding US_ASCII(0x0-0x7f) */
147   ISCII_BENGALI = 0xf1,   /* Excluding US_ASCII(0x0-0x7f) */
148   ISCII_GUJARATI = 0xf2,  /* Excluding US_ASCII(0x0-0x7f) */
149   ISCII_HINDI = 0xf3,     /* Excluding US_ASCII(0x0-0x7f) */
150   ISCII_KANNADA = 0xf4,   /* Excluding US_ASCII(0x0-0x7f) */
151   ISCII_MALAYALAM = 0xf5, /* Excluding US_ASCII(0x0-0x7f) */
152   ISCII_ORIYA = 0xf6,     /* Excluding US_ASCII(0x0-0x7f) */
153   ISCII_PUNJABI = 0xf7,   /* Excluding US_ASCII(0x0-0x7f) */
154   ISCII_TAMIL = 0xf8,     /* Excluding US_ASCII(0x0-0x7f) */
155   ISCII_TELUGU = 0xf9,    /* Excluding US_ASCII(0x0-0x7f) */
156 #if 0
157   ISCII_ROMAN = 0xfa, /* Excluding US_ASCII(0x0-0x7f) */
158 #endif
159 
160   /* Followings are ISO2022 based charsets with revisions. */
161 
162   /* Revision 1 */
163   JISX0208_1990 = CS_REVISION_1(JISX0208_1983),
164   ISO10646_UCS4_1_V = CS_REVISION_1(ISO10646_UCS4_1), /* mef original */
165 
166   /* Followings are mef original classifications */
167 
168   /*
169    * Those who are not ISO2022 registed characterset but confirm to ISO2022.
170    * (Bi-width)
171    * 0x1e0 - 0xf5
172    */
173   JISC6226_1978_NEC_EXT = 0x1e0,
174   JISC6226_1978_NECIBM_EXT = 0x1e1,
175   JISX0208_1983_MAC_EXT = 0x1e2,
176 
177   /*
178    * Those who are not ISO2022 registed characterset or do not confirm to
179    * ISO2022.
180    * (Bi-width)
181    * 0x1e3 - 0x1e9
182    */
183   SJIS_IBM_EXT = 0x1e3,
184   UHC = 0x1e4,
185   BIG5 = 0x1e5,
186   CNS11643_1992_EUCTW_G2 = 0x1e6,
187   GBK = 0x1e7,
188   JOHAB = 0x1e8,
189   HKSCS = 0x1e9,
190 
191   MAX_CHARSET = 0x2ff
192 
193 } ef_charset_t;
194 
195 #endif
196