1 /* iana_charsets.c
2  *
3  * Routines and tables for IANA-registered character sets
4  *
5  *    http://www.iana.org/assignments/character-sets/character-sets.xhtml
6  *
7  * Wireshark - Network traffic analyzer
8  * By Gerald Combs <gerald@wireshark.org>
9  * Copyright 1998 Gerald Combs
10  *
11  * WAP dissector based on original work by Ben Fowler
12  * Updated by Neil Hunter <neil.hunter@energis-squared.com>
13  * WTLS support by Alexandre P. Ferreira (Splice IP)
14  *
15  * SPDX-License-Identifier: GPL-2.0-or-later
16  */
17 
18 #include "config.h"
19 
20 #include <glib.h>
21 
22 #include <epan/proto.h>
23 #include <epan/value_string.h>
24 
25 #include <epan/iana_charsets.h>
26 
27 /*
28  * Map a MIBenum code for a charset to a Wireshark string encoding.
29  */
30 guint
31 mibenum_charset_to_encoding (guint charset)
32 {
33     switch (charset) {
34         case 3:
35             return ENC_NA|ENC_ASCII;
36 
37         case 4:
38             return ENC_NA|ENC_ISO_8859_1;
39 
40         case 5:
41             return ENC_NA|ENC_ISO_8859_2;
42 
43         case 6:
44             return ENC_NA|ENC_ISO_8859_3;
45 
46         case 7:
47             return ENC_NA|ENC_ISO_8859_4;
48 
49         case 8:
50             return ENC_NA|ENC_ISO_8859_5;
51 
52         case 9:
53             return ENC_NA|ENC_ISO_8859_6;
54 
55         case 10:
56             return ENC_NA|ENC_ISO_8859_7;
57 
58         case 11:
59             return ENC_NA|ENC_ISO_8859_8;
60 
61         case 12:
62             return ENC_NA|ENC_ISO_8859_9;
63 
64         case 13:
65             return ENC_NA|ENC_ISO_8859_10;
66 
67         case 106:
68             return ENC_NA|ENC_UTF_8;
69 
70         case 109:
71             return ENC_NA|ENC_ISO_8859_13;
72 
73         case 110:
74             return ENC_NA|ENC_ISO_8859_14;
75 
76         case 111:
77             return ENC_NA|ENC_ISO_8859_15;
78 
79         case 112:
80             return ENC_NA|ENC_ISO_8859_16;
81 
82         case 1000:
83             /*
84              * The IANA page says:
85              *
86              *    this needs to specify network byte order: the
87              *    standard does not specify
88              *
89              * so presumably this means "big-endian UCS-2".
90              */
91             return ENC_BIG_ENDIAN|ENC_UCS_2;
92 
93         case 1001:
94             /*
95              * The IANA page says the same thing as for UCS-2.
96              */
97             return ENC_BIG_ENDIAN|ENC_UCS_4;
98 
99         case 1013:
100             return ENC_BIG_ENDIAN|ENC_UTF_16;
101 
102         case 1014:
103             return ENC_LITTLE_ENDIAN|ENC_UTF_16;
104 
105         case 1015:
106             /* XXX - UTF-16 with a BOM at the beginning */
107             return ENC_LITTLE_ENDIAN|ENC_UTF_16;
108 
109         case 2011:
110             return ENC_NA|ENC_CP437;
111 
112         case 2259:
113             return ENC_NA|ENC_ISO_8859_11;
114 
115         default:
116             return ENC_NA|ENC_ASCII;
117     }
118 }
119 
120 /* Updated from 10/04/2012 version */
121 static const value_string mibenum_vals_character_sets[] = {
122     {    0, "*" },
123     {    3, "US-ASCII" },
124     {    4, "ISO-8859-1" },
125     {    5, "ISO-8859-2" },
126     {    6, "ISO-8859-3" },
127     {    7, "ISO-8859-4" },
128     {    8, "ISO-8859-5" },
129     {    9, "ISO-8859-6" },
130     {   10, "ISO-8859-7" },
131     {   11, "ISO-8859-8" },
132     {   12, "ISO-8859-9" },
133     {   13, "ISO-8859-10" },
134     {   14, "ISO_6937-2-add" },
135     {   15, "JIS_X0201" },
136     {   16, "JIS_Encoding" },
137     {   17, "Shift_JIS" },
138     {   18, "EUC-JP" },
139     {   19, "Extended_UNIX_Code_Fixed_Width_for_Japanese" },
140     {   20, "BS_4730" },
141     {   21, "SEN_850200_C" },
142     {   22, "IT" },
143     {   23, "ES" },
144     {   24, "DIN_66003" },
145     {   25, "NS_4551-1" },
146     {   26, "NF_Z_62-010" },
147     {   27, "ISO-10646-UTF-1" },
148     {   28, "ISO_646.basic:1983" },
149     {   29, "INVARIANT" },
150     {   30, "ISO_646.irv:1983" },
151     {   31, "NATS-SEFI" },
152     {   32, "NATS-SEFI-ADD" },
153     {   33, "NATS-DANO" },
154     {   34, "NATS-DANO-ADD" },
155     {   35, "SEN_850200_B" },
156     {   36, "KS_C_5601-1987" },
157     {   37, "ISO-2022-KR" },
158     {   38, "EUC-KR" },
159     {   39, "ISO-2022-JP" },
160     {   40, "ISO-2022-JP-2" },
161     {   41, "JIS_C6220-1969-jp" },
162     {   42, "JIS_C6220-1969-ro" },
163     {   43, "PT" },
164     {   44, "greek7-old" },
165     {   45, "latin-greek" },
166     {   46, "NF_Z_62-010_(1973)" },
167     {   47, "Latin-greek-1" },
168     {   48, "ISO_5427" },
169     {   49, "JIS_C6226-1978" },
170     {   50, "BS_viewdata" },
171     {   51, "INIS" },
172     {   52, "INIS-8" },
173     {   53, "INIS-cyrillic" },
174     {   54, "ISO_5427:1981" },
175     {   55, "ISO_5428:1980" },
176     {   56, "GB_1988-80" },
177     {   57, "GB_2312-80" },
178     {   58, "NS_4551-2" },
179     {   59, "videotex-suppl" },
180     {   60, "PT2" },
181     {   61, "ES2" },
182     {   62, "MSZ_7795.3" },
183     {   63, "JIS_C6226-1983" },
184     {   64, "greek7" },
185     {   65, "ASMO_449" },
186     {   66, "iso-ir-90" },
187     {   67, "JIS_C6229-1984-a" },
188     {   68, "JIS_C6229-1984-b" },
189     {   69, "JIS_C6229-1984-b-add" },
190     {   70, "JIS_C6229-1984-hand" },
191     {   71, "JIS_C6229-1984-hand-add" },
192     {   72, "JIS_C6229-1984-kana" },
193     {   73, "ISO_2033-1983" },
194     {   74, "ANSI_X3.110-1983" },
195     {   75, "T.61-7bit" },
196     {   76, "T.61-8bit" },
197     {   77, "ECMA-cyrillic" },
198     {   78, "CSA_Z243.4-1985-1" },
199     {   79, "CSA_Z243.4-1985-2" },
200     {   80, "CSA_Z243.4-1985-gr" },
201     {   81, "ISO-8859-6-E" },
202     {   82, "ISO-8859-6-I" },
203     {   83, "T.101-G2" },
204     {   84, "ISO-8859-8-E" },
205     {   85, "ISO-8859-8-I" },
206     {   86, "CSN_369103" },
207     {   87, "JUS_I.B1.002" },
208     {   88, "IEC_P27-1" },
209     {   89, "JUS_I.B1.003-serb" },
210     {   90, "JUS_I.B1.003-mac" },
211     {   91, "greek-ccitt" },
212     {   92, "NC_NC00-10:81" },
213     {   93, "ISO_6937-2-25" },
214     {   94, "GOST_19768-74" },
215     {   95, "ISO_8859-supp" },
216     {   96, "ISO_10367-box" },
217     {   97, "latin-lap" },
218     {   98, "JIS_X0212-1990" },
219     {   99, "DS_2089" },
220     {  100, "us-dk" },
221     {  101, "dk-us" },
222     {  102, "KSC5636" },
223     {  103, "UNICODE-1-1-UTF-7" },
224     {  104, "ISO-2022-CN" },
225     {  105, "ISO-2022-CN-EXT" },
226     {  106, "UTF-8" },
227     {  109, "ISO-8859-13" },
228     {  110, "ISO-8859-14" },
229     {  111, "ISO-8859-15" },
230     {  112, "ISO-8859-16" },
231     {  113, "GBK" },
232     {  114, "GB18030" },
233     {  115, "OSD_EBCDIC_DF04_15" },
234     {  116, "OSD_EBCDIC_DF03_IRV" },
235     {  117, "OSD_EBCDIC_DF04_1" },
236     {  118, "ISO-11548-1" },
237     {  119, "KZ-1048" },
238 
239     { 1000, "ISO-10646-UCS-2" },
240     { 1001, "ISO-10646-UCS-4" },
241     { 1002, "ISO-10646-UCS-Basic" },
242     { 1003, "ISO-10646-Unicode-Latin1" },
243     { 1004, "ISO-10646-J-1" },
244     { 1005, "ISO-Unicode-IBM-1261" },
245     { 1006, "ISO-Unicode-IBM-1268" },
246     { 1007, "ISO-Unicode-IBM-1276" },
247     { 1008, "ISO-Unicode-IBM-1264" },
248     { 1009, "ISO-Unicode-IBM-1265" },
249     { 1010, "UNICODE-1-1" },
250     { 1011, "SCSU" },
251     { 1012, "UTF-7" },
252     { 1013, "UTF-16BE" },
253     { 1014, "UTF-16LE" },
254     { 1015, "UTF-16" },
255     { 1016, "CESU-8" },
256     { 1017, "UTF-32" },
257     { 1018, "UTF-32BE" },
258     { 1019, "UTF-32LE" },
259     { 1020, "BOCU-1" },
260 
261     { 2000, "ISO-8859-1-Windows-3.0-Latin-1" },
262     { 2001, "ISO-8859-1-Windows-3.1-Latin-1" },
263     { 2002, "ISO-8859-2-Windows-Latin-2" },
264     { 2003, "ISO-8859-9-Windows-Latin-5" },
265     { 2004, "hp-roman8" },
266     { 2005, "Adobe-Standard-Encoding" },
267     { 2006, "Ventura-US" },
268     { 2007, "Ventura-International" },
269     { 2008, "DEC-MCS" },
270     { 2009, "IBM850" },
271     { 2010, "IBM852" },
272     { 2011, "IBM437" },
273     { 2012, "PC8-Danish-Norwegian" },
274     { 2013, "IBM862" },
275     { 2014, "PC8-Turkish" },
276     { 2015, "IBM-Symbols" },
277     { 2016, "IBM-Thai" },
278     { 2017, "HP-Legal" },
279     { 2018, "HP-Pi-font" },
280     { 2019, "HP-Math8" },
281     { 2020, "Adobe-Symbol-Encoding" },
282     { 2021, "HP-DeskTop" },
283     { 2022, "Ventura-Math" },
284     { 2023, "Microsoft-Publishing" },
285     { 2024, "Windows-31J" },
286     { 2025, "GB2312" },
287     { 2026, "Big5" },
288     { 2027, "macintosh" },
289     { 2028, "IBM037" },
290     { 2029, "IBM038" },
291     { 2030, "IBM273" },
292     { 2031, "IBM274" },
293     { 2032, "IBM275" },
294     { 2033, "IBM277" },
295     { 2034, "IBM278" },
296     { 2035, "IBM280" },
297     { 2036, "IBM281" },
298     { 2037, "IBM284" },
299     { 2038, "IBM285" },
300     { 2039, "IBM290" },
301     { 2040, "IBM297" },
302     { 2041, "IBM420" },
303     { 2042, "IBM423" },
304     { 2043, "IBM424" },
305     { 2044, "IBM500" },
306     { 2045, "IBM851" },
307     { 2046, "IBM855" },
308     { 2047, "IBM857" },
309     { 2048, "IBM860" },
310     { 2049, "IBM861" },
311     { 2050, "IBM863" },
312     { 2051, "IBM864" },
313     { 2052, "IBM865" },
314     { 2053, "IBM868" },
315     { 2054, "IBM869" },
316     { 2055, "IBM870" },
317     { 2056, "IBM871" },
318     { 2057, "IBM880" },
319     { 2058, "IBM891" },
320     { 2059, "IBM903" },
321     { 2060, "IBM904" },
322     { 2061, "IBM905" },
323     { 2062, "IBM918" },
324     { 2063, "IBM1026" },
325     { 2064, "EBCDIC-AT-DE" },
326     { 2065, "EBCDIC-AT-DE-A" },
327     { 2066, "EBCDIC-CA-FR" },
328     { 2067, "EBCDIC-DK-NO" },
329     { 2068, "EBCDIC-DK-NO-A" },
330     { 2069, "EBCDIC-FI-SE" },
331     { 2070, "EBCDIC-FI-SE-A" },
332     { 2071, "EBCDIC-FR" },
333     { 2072, "EBCDIC-IT" },
334     { 2073, "EBCDIC-PT" },
335     { 2074, "EBCDIC-ES" },
336     { 2075, "EBCDIC-ES-A" },
337     { 2076, "EBCDIC-ES-S" },
338     { 2077, "EBCDIC-UK" },
339     { 2078, "EBCDIC-US" },
340     { 2079, "UNKNOWN-8BIT" },
341     { 2080, "MNEMONIC" },
342     { 2081, "MNEM" },
343     { 2082, "VISCII" },
344     { 2083, "VIQR" },
345     { 2084, "KOI8-R" },
346     { 2085, "HZ-GB-2312" },
347     { 2086, "IBM866" },
348     { 2087, "IBM775" },
349     { 2088, "KOI8-U" },
350     { 2089, "IBM00858" },
351     { 2090, "IBM00924" },
352     { 2091, "IBM01140" },
353     { 2092, "IBM01141" },
354     { 2093, "IBM01142" },
355     { 2094, "IBM01143" },
356     { 2095, "IBM01144" },
357     { 2096, "IBM01145" },
358     { 2097, "IBM01146" },
359     { 2098, "IBM01147" },
360     { 2099, "IBM01148" },
361     { 2100, "IBM01149" },
362     { 2101, "Big5-HKSCS" },
363     { 2102, "IBM1047" },
364     { 2103, "PTCP154" },
365     { 2104, "Amiga-1251" },
366     { 2105, "KOI7-switched" },
367     { 2106, "BRF" },
368     { 2107, "TSCII" },
369     { 2108, "CP51932" },
370     { 2109, "windows-874" },
371 
372     { 2250, "windows-1250" },
373     { 2251, "windows-1251" },
374     { 2252, "windows-1252" },
375     { 2253, "windows-1253" },
376     { 2254, "windows-1254" },
377     { 2255, "windows-1255" },
378     { 2256, "windows-1256" },
379     { 2257, "windows-1257" },
380     { 2258, "windows-1258" },
381     { 2259, "TIS-620" },
382     { 2260, "CP50220" },
383     { 0, NULL }
384 };
385 value_string_ext mibenum_vals_character_sets_ext = VALUE_STRING_EXT_INIT(mibenum_vals_character_sets);
386 
387 /*
388  * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
389  *
390  * Local variables:
391  * c-basic-offset: 4
392  * tab-width: 8
393  * indent-tabs-mode: nil
394  * End:
395  *
396  * vi: set shiftwidth=4 tabstop=8 expandtab:
397  * :indentSize=4:tabSize=8:noTabs=true:
398  */
399