1 /*******************************************************************************
2 language.c
3
4 libquicktime - A library for reading and writing quicktime/avi/mp4 files.
5 http://libquicktime.sourceforge.net
6
7 Copyright (C) 2002 Heroine Virtual Ltd.
8 Copyright (C) 2002-2011 Members of the libquicktime project.
9
10 This library is free software; you can redistribute it and/or modify it under
11 the terms of the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 2.1 of the License, or (at your option)
13 any later version.
14
15 This library is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 details.
19
20 You should have received a copy of the GNU Lesser General Public License along
21 with this library; if not, write to the Free Software Foundation, Inc., 51
22 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 *******************************************************************************/
24
25 #include "lqt_private.h"
26 #include <string.h>
27
28 #define LOG_DOMAIN "language"
29
30 /* Definitions for charsets. These are mappings from Macintosh charset
31 symbols to iconv charsets. The ones, which are set to (char*)0
32 are not available in iconv (replacements??) */
33
34 #define smRoman { "MACINTOSH", "ISO-8859-1" },
35 #define smHebrew { (char*)0, "ISO-8859-8" },
36 #define smJapanese { (char*)0, "SHIFT-JIS" },
37 #define smArabic { (char*)0, "ISO-8859-6" },
38 #define smExtArabic { (char*)0, "ISO-8859-6" },
39 #define smGreek { (char*)0, (char*)0 },
40 #define smCentralEuroRoman { (char*)0, "ISO-8859-2" },
41 #define smIcelandic { "MAC-IS", (char*)0 },
42 #define smTradChinese { (char*)0, (char*)0 },
43 #define smDevanagari { (char*)0, (char*)0 },
44 #define smThai { (char*)0, (char*)0 },
45 #define smKorean { (char*)0, (char*)0 },
46 #define smSami { "MAC-SAMI", (char*)0 },
47 #define smCyrillic { "MAC-CYRILLIC", (char*)0 },
48 #define smSimpChinese { (char*)0, (char*)0 },
49 #define smCeltic { (char*)0, (char*)0 },
50 #define smRomanian { (char*)0, (char*)0 },
51 #define smUkrainian { "MAC-UK", (char*)0 },
52 #define smArmenian { (char*)0, (char*)0 },
53 #define smGeorgian { (char*)0, (char*)0 },
54 #define smMongolian { (char*)0, (char*)0 },
55 #define smTibetan { (char*)0, (char*)0 },
56 #define smBengali { (char*)0, (char*)0 },
57 #define smGuriati { (char*)0, (char*)0 },
58 #define smGurmukhi { (char*)0, (char*)0 },
59 #define smOriya { (char*)0, (char*)0 },
60 #define smMalayalam { (char*)0, (char*)0 },
61 #define smKannada { (char*)0, (char*)0 },
62 #define smTamil { (char*)0, (char*)0 },
63 #define smTelugu { (char*)0, (char*)0 },
64 #define smSinhalese { (char*)0, (char*)0 },
65 #define smBurmese { (char*)0, (char*)0 },
66 #define smKhmer { (char*)0, (char*)0 },
67 #define smLaotian { (char*)0, (char*)0 },
68 #define smVietnamese { (char*)0, (char*)0 },
69 #define smEthiopic { (char*)0, (char*)0 },
70
71 /* Language / character set codecs */
72
73 static struct
74 {
75 int mac_code; // Integer mac code
76 char language[4]; // 3 character language code
77
78 struct
79 {
80 char * charset; // Character set (understood by iconv_open)
81 char * charset_fallback; // Character set (understood by iconv_open)
82 } cs;
83 }
84 mac_languages[] =
85 {
86 { 0, "eng", smRoman }, // English
87 { 1, "fra", smRoman }, // French
88 { 2, "ger", smRoman }, // German
89 { 3, "ita", smRoman }, // Italian
90 { 4, "dut", smRoman }, // Dutch
91 { 5, "swe", smRoman }, // Swedish
92 { 6, "spa", smRoman }, // Spanish
93 { 7, "dan", smRoman }, // Danish
94 { 8, "por", smRoman }, // Portuguese
95 { 9, "nor", smRoman }, // Norwegian
96 { 10, "heb", smHebrew }, // Hebrew
97 { 11, "jpn", smJapanese }, // Japanese
98 { 12, "ara", smArabic }, // Arabic
99 { 13, "fin", smRoman }, // Finnish
100 { 14, "gre", smGreek }, // Greek
101 { 15, "ice", smIcelandic }, // Icelandic
102 { 16, "mlt", smRoman }, // Maltese
103 { 17, "tur", smRoman }, // Turkish
104 { 18, "scr", smRoman }, // Croatian
105 { 19, "chi", smTradChinese }, // Traditional Chinese
106 { 20, "urd", smArabic }, // Urdu
107 { 21, "hin", smDevanagari }, // Hindi
108 { 22, "tha", smThai }, // Thai
109 { 23, "kor", smKorean}, // Korean
110 { 24, "lit", smCentralEuroRoman }, // Lithuanian
111 { 25, "pol", smCentralEuroRoman }, // Polish
112 { 26, "hun", smCentralEuroRoman }, // Hungarian
113 { 27, "est", smCentralEuroRoman }, // Estonian
114 { 28, "lav", smCentralEuroRoman }, // Latvian
115 { 29, "smi", smSami }, // Saamisk
116 { 30, "fao", smIcelandic}, // Faeroese
117 { 31, "far", smArabic }, // Farsi
118 { 32, "rus", smCyrillic }, // Russian
119 { 33, "chi", smSimpChinese }, // Simplified Chinese
120 { 34, "dut", smRoman }, // Flemish
121 { 35, "gle", smCeltic }, // Irish
122 { 36, "alb", smRoman }, // Albanian
123 { 37, "rum", smRomanian }, // Romanian
124 { 38, "cze", smCentralEuroRoman }, // Czech
125 { 39, "slo", smCentralEuroRoman }, // Slovak
126 { 40, "slv", smCentralEuroRoman }, // Slovenian
127 { 41, "yid", smHebrew }, // Yiddish
128 { 42, "scc", smCyrillic }, // Serbian
129 { 43, "mac", smCyrillic }, // Macedonian
130 { 44, "bul", smCyrillic }, // Bulgarian
131 { 45, "ukr", smUkrainian }, // Ukrainian
132 { 46, "bel", smCyrillic }, // Byelorussian
133 { 47, "uzb", smCyrillic }, // Uzbek
134 { 48, "kaz", smCyrillic }, // Kazakh
135 { 49, "aze", smCyrillic }, // Azerbaijani (cyrillic)
136 { 50, "aze", smArabic }, // Azerbaijani (arabic)
137 { 51, "arm", smArmenian }, // Armenian
138 { 52, "geo", smGeorgian }, // Georgian
139 { 53, "mol", smCyrillic }, // Moldavian
140 { 54, "kir", smCyrillic }, // Kirghiz
141 { 55, "tgk", smCyrillic }, // Tajiki
142 { 56, "tuk", smCyrillic }, // Turkmen
143 { 57, "mon", smMongolian }, // Mongolian
144 { 58, "mon", smCyrillic }, // Mongolian (cyrillic)
145 { 59, "pus", smArabic }, // Pashto
146 { 60, "kur", smArabic }, // Kurdish
147 { 61, "kas", smArabic }, // Kashmiri
148 { 62, "snd", smExtArabic }, // Sindhi
149 { 63, "tib", smTibetan }, // Tibetan
150 { 64, "nep", smDevanagari }, // Nepali
151 { 65, "san", smDevanagari }, // Sanskrit
152 // { 66, "", smDevanagari }, // Marathi ??
153 { 67, "ben", smBengali }, // Bengali
154 { 68, "asm", smBengali }, // Assamese
155 { 69, "guj", smGuriati }, // Gujarati
156 { 70, "pan", smGurmukhi }, // Punjabi
157 { 71, "ori", smOriya }, // Oriya
158 { 72, "mal", smMalayalam }, // Malayalam
159 { 73, "kan", smKannada }, // Kannada
160 { 74, "tam", smTamil }, // Tamil
161 { 75, "tel", smTelugu }, // Telugu
162 { 76, "sin", smSinhalese }, // Sinhalese
163 { 77, "bur", smBurmese }, // Burmese
164 { 78, "khm", smKhmer }, // Khmer
165 { 79, "lao", smLaotian }, // Lao
166 { 80, "vie", smVietnamese }, // Vietnamese
167 { 81, "ind", smRoman }, // Indonesian
168 { 82, "tgl", smRoman }, // Tagalog
169 { 83, "may", smRoman }, // Malay (roman)
170 { 84, "may", smArabic }, // Malay (arabic)
171 { 85, "amh", smEthiopic }, // Amharic
172 { 86, "tir", smEthiopic }, // Tigrinya
173 { 87, "orm", smEthiopic }, // Oromo
174 { 88, "som", smRoman }, // Somali
175 { 89, "swa", smRoman }, // Swahili
176 { 90, "kin", smRoman }, // Kinyarwanda
177 { 91, "run", smRoman }, // Rundi
178 { 92, "nya", smRoman }, // Chewa
179 { 93, "mlg", smRoman }, // Malagasy
180 { 94, "epo", smRoman }, // Esperanto
181 { 128, "wel", smRoman }, // Welsh
182 { 129, "baq", smRoman }, // Basque
183 { 130, "cat", smRoman }, // Catalan
184 { 131, "lat", smRoman }, // Latin
185 { 132, "que", smRoman }, // Quechua
186 { 133, "grn", smRoman }, // Guarani
187 { 134, "aym", smRoman }, // Aymara
188 { 135, "tat", smCyrillic }, // Tatar
189 { 136, "uig", smArabic }, // Uighur
190 { 137, "dzo", smTibetan }, // Dzongkha
191 { 138, "jav", smRoman }, // Javanese (roman)
192 };
193
194 #define NUM_CODES (sizeof(mac_languages)/sizeof(mac_languages[0]))
195
get_language(quicktime_trak_t * trak,char * ret,lqt_file_type_t file_type)196 static int get_language(quicktime_trak_t * trak, char * ret,
197 lqt_file_type_t file_type)
198 {
199 int i;
200 if(IS_MP4(file_type))
201 {
202 ret[0] = ((trak->mdia.mdhd.language >> 10) & 0x1f) + 0x60;
203 ret[1] = ((trak->mdia.mdhd.language >> 5) & 0x1f) + 0x60;
204 ret[2] = (trak->mdia.mdhd.language & 0x1f) + 0x60;
205 ret[3] = '\0';
206 return 1;
207 }
208 for(i = 0; i < NUM_CODES; i++)
209 {
210 if(trak->mdia.mdhd.language == mac_languages[i].mac_code)
211 {
212 strcpy(ret, mac_languages[i].language);
213 return 1;
214 }
215 }
216 return 0;
217 }
218
219 static char * unicode_string = LQT_UTF_8_16;
220
lqt_get_charset(int mac_code,lqt_file_type_t file_type)221 const char * lqt_get_charset(int mac_code, lqt_file_type_t file_type)
222 {
223 int i;
224
225 if(IS_MP4(file_type))
226 return unicode_string;
227
228 for(i = 0; i < NUM_CODES; i++)
229 {
230 if(mac_code == mac_languages[i].mac_code)
231 return mac_languages[i].cs.charset;
232 }
233 return (char*)0;
234 }
235
lqt_get_charset_fallback(int mac_code,lqt_file_type_t file_type)236 const char * lqt_get_charset_fallback(int mac_code, lqt_file_type_t file_type)
237 {
238 int i;
239
240 if(IS_MP4(file_type))
241 return unicode_string;
242
243 for(i = 0; i < NUM_CODES; i++)
244 {
245 if(mac_code == mac_languages[i].mac_code)
246 return mac_languages[i].cs.charset_fallback;
247 }
248 return (char*)0;
249 }
250
251
set_language_code(quicktime_trak_t * trak,const char * language,lqt_file_type_t file_type)252 static int set_language_code(quicktime_trak_t * trak,
253 const char * language, lqt_file_type_t file_type)
254 {
255 int i;
256
257 if(IS_MP4(file_type))
258 {
259 trak->mdia.mdhd.language =
260 ((int)(language[0]-0x60) << 10) |
261 ((int)(language[1]-0x60) << 5) |
262 ((int)(language[2]-0x60));
263 return 0;
264 }
265
266 for(i = 0; i < NUM_CODES; i++)
267 {
268 if(!strcmp(language, mac_languages[i].language))
269 {
270 trak->mdia.mdhd.language = mac_languages[i].mac_code;
271 return 1;
272 }
273 }
274 return 0;
275 }
276
lqt_set_audio_language(quicktime_t * file,int track,const char * language)277 void lqt_set_audio_language(quicktime_t * file, int track, const char * language)
278 {
279 if((track < 0) || (track >= file->total_atracks))
280 return;
281 set_language_code((file->atracks[track].track), language, file->file_type);
282 }
283
lqt_get_audio_language(quicktime_t * file,int track,char * language)284 int lqt_get_audio_language(quicktime_t * file, int track, char * language)
285 {
286 if((track < 0) || (track >= file->total_atracks))
287 return 0;
288 return get_language(file->atracks[track].track, language, file->file_type);
289 }
290
291
lqt_set_text_language(quicktime_t * file,int track,const char * language)292 void lqt_set_text_language(quicktime_t * file, int track, const char * language)
293 {
294 if((track < 0) || (track >= file->total_ttracks))
295 return;
296
297 set_language_code((file->ttracks[track].track), language, file->file_type);
298 }
299
lqt_get_text_language(quicktime_t * file,int track,char * language)300 int lqt_get_text_language(quicktime_t * file, int track, char * language)
301 {
302 if((track < 0) || (track >= file->total_ttracks))
303 return 0;
304 return get_language(file->ttracks[track].track, language, file->file_type);
305 }
306