1 // Copyright 2013 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // 16 // Author: dsites@google.com (Dick Sites) 17 // 18 19 #ifndef I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ 20 #define I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ 21 22 namespace CLD2 { 23 24 enum Encoding { 25 ISO_8859_1 = 0, // ASCII 26 ISO_8859_2 = 1, // Latin2 27 ISO_8859_3 = 2, // 28 ISO_8859_4 = 3, // Latin4 29 ISO_8859_5 = 4, // ISO-8859-5 30 ISO_8859_6 = 5, // Arabic 31 ISO_8859_7 = 6, // Greek 32 ISO_8859_8 = 7, // Hebrew 33 ISO_8859_9 = 8, // 34 ISO_8859_10 = 9, // 35 JAPANESE_EUC_JP = 10, // EUC_JP 36 JAPANESE_SHIFT_JIS = 11, // SJS 37 JAPANESE_JIS = 12, // JIS 38 CHINESE_BIG5 = 13, // BIG5 39 CHINESE_GB = 14, // GB 40 CHINESE_EUC_CN = 15, // Misnamed. Should be EUC_TW. Was Basis Tech 41 // CNS11643EUC, before that EUC-CN(!) 42 KOREAN_EUC_KR = 16, // KSC 43 UNICODE_UNUSED = 17, // Unicode 44 CHINESE_EUC_DEC = 18, // Misnamed. Should be EUC_TW. Was 45 // CNS11643EUC, before that EUC. 46 CHINESE_CNS = 19, // Misnamed. Should be EUC_TW. Was 47 // CNS11643EUC, before that CNS. 48 CHINESE_BIG5_CP950 = 20, // BIG5_CP950 49 JAPANESE_CP932 = 21, // CP932 50 UTF8 = 22, 51 UNKNOWN_ENCODING = 23, 52 ASCII_7BIT = 24, // ISO_8859_1 with all characters <= 127. 53 RUSSIAN_KOI8_R = 25, // KOI8R 54 RUSSIAN_CP1251 = 26, // CP1251 55 56 //---------------------------------------------------------- 57 MSFT_CP1252 = 27, // 27: CP1252 aka MSFT euro ascii 58 RUSSIAN_KOI8_RU = 28, // CP21866 aka KOI8-U, used for Ukrainian. 59 // Misnamed, this is _not_ KOI8-RU but KOI8-U. 60 // KOI8-U is used much more often than KOI8-RU. 61 MSFT_CP1250 = 29, // CP1250 aka MSFT eastern european 62 ISO_8859_15 = 30, // aka ISO_8859_0 aka ISO_8859_1 euroized 63 //---------------------------------------------------------- 64 65 //---------------------------------------------------------- 66 MSFT_CP1254 = 31, // used for Turkish 67 MSFT_CP1257 = 32, // used in Baltic countries 68 //---------------------------------------------------------- 69 70 //---------------------------------------------------------- 71 //---------------------------------------------------------- 72 ISO_8859_11 = 33, // aka TIS-620, used for Thai 73 MSFT_CP874 = 34, // used for Thai 74 MSFT_CP1256 = 35, // used for Arabic 75 76 //---------------------------------------------------------- 77 MSFT_CP1255 = 36, // Logical Hebrew Microsoft 78 ISO_8859_8_I = 37, // Iso Hebrew Logical 79 HEBREW_VISUAL = 38, // Iso Hebrew Visual 80 //---------------------------------------------------------- 81 82 //---------------------------------------------------------- 83 CZECH_CP852 = 39, 84 CZECH_CSN_369103 = 40, // aka ISO_IR_139 aka KOI8_CS 85 MSFT_CP1253 = 41, // used for Greek 86 RUSSIAN_CP866 = 42, 87 //---------------------------------------------------------- 88 89 //---------------------------------------------------------- 90 // Handled by iconv in glibc 91 ISO_8859_13 = 43, 92 ISO_2022_KR = 44, 93 GBK = 45, 94 GB18030 = 46, 95 BIG5_HKSCS = 47, 96 ISO_2022_CN = 48, 97 98 //----------------------------------------------------------- 99 // Following 4 encodings are deprecated (font encodings) 100 TSCII = 49, 101 TAMIL_MONO = 50, 102 TAMIL_BI = 51, 103 JAGRAN = 52, 104 105 106 MACINTOSH_ROMAN = 53, 107 UTF7 = 54, 108 109 //----------------------------------------------------------- 110 // Following 2 encodings are deprecated (font encodings) 111 BHASKAR = 55, // Indic encoding - Devanagari 112 HTCHANAKYA = 56, // 56 Indic encoding - Devanagari 113 114 //----------------------------------------------------------- 115 UTF16BE = 57, // big-endian UTF-16 116 UTF16LE = 58, // little-endian UTF-16 117 UTF32BE = 59, // big-endian UTF-32 118 UTF32LE = 60, // little-endian UTF-32 119 //----------------------------------------------------------- 120 121 //----------------------------------------------------------- 122 // An encoding that means "This is not text, but it may have some 123 // simple ASCII text embedded". Intended input conversion 124 // is to keep strings of >=4 seven-bit ASCII characters 125 BINARYENC = 61, 126 //----------------------------------------------------------- 127 128 //----------------------------------------------------------- 129 // Some Web pages allow a mixture of HZ-GB and GB-2312 by using 130 // ~{ ... ~} for 2-byte pairs, and the browsers support this. 131 HZ_GB_2312 = 62, 132 //----------------------------------------------------------- 133 134 //----------------------------------------------------------- 135 // Some external vendors make the common input error of 136 // converting MSFT_CP1252 to UTF8 *twice*. 137 UTF8UTF8 = 63, 138 //----------------------------------------------------------- 139 140 //----------------------------------------------------------- 141 // Following 6 encodings are deprecated (font encodings) 142 TAM_ELANGO = 64, // Elango - Tamil 143 TAM_LTTMBARANI = 65, // Barani - Tamil 144 TAM_SHREE = 66, // Shree - Tamil 145 TAM_TBOOMIS = 67, // TBoomis - Tamil 146 TAM_TMNEWS = 68, // TMNews - Tamil 147 TAM_WEBTAMIL = 69, // Webtamil - Tamil 148 //----------------------------------------------------------- 149 150 //----------------------------------------------------------- 151 // Shift_JIS variants used by Japanese cell phone carriers. 152 KDDI_SHIFT_JIS = 70, 153 DOCOMO_SHIFT_JIS = 71, 154 SOFTBANK_SHIFT_JIS = 72, 155 // ISO-2022-JP variants used by KDDI and SoftBank. 156 KDDI_ISO_2022_JP = 73, 157 SOFTBANK_ISO_2022_JP = 74, 158 //----------------------------------------------------------- 159 160 NUM_ENCODINGS = 75, // Always keep this at the end. It is not a 161 // valid Encoding enum, it is only used to 162 // indicate the total number of Encodings. 163 }; 164 165 } // End namespace CLD2 166 167 #endif // I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ 168 169 170