1 /*******************************************************************************
2  language.c
3 
4  libquicktime - A library for reading and writing quicktime/avi/mp4 files.
5  http://libquicktime.sourceforge.net
6 
7  Copyright (C) 2002 Heroine Virtual Ltd.
8  Copyright (C) 2002-2011 Members of the libquicktime project.
9 
10  This library is free software; you can redistribute it and/or modify it under
11  the terms of the GNU Lesser General Public License as published by the Free
12  Software Foundation; either version 2.1 of the License, or (at your option)
13  any later version.
14 
15  This library is distributed in the hope that it will be useful, but WITHOUT
16  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17  FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18  details.
19 
20  You should have received a copy of the GNU Lesser General Public License along
21  with this library; if not, write to the Free Software Foundation, Inc., 51
22  Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 *******************************************************************************/
24 
25 #include "lqt_private.h"
26 #include <string.h>
27 
28 #define LOG_DOMAIN "language"
29 
30 /* Definitions for charsets. These are mappings from Macintosh charset
31    symbols to iconv charsets. The ones, which are set to (char*)0
32    are not available in iconv (replacements??) */
33 
34 #define smRoman            { "MACINTOSH",    "ISO-8859-1"    },
35 #define smHebrew           { (char*)0,       "ISO-8859-8"    },
36 #define smJapanese         { (char*)0,       "SHIFT-JIS"     },
37 #define smArabic           { (char*)0,       "ISO-8859-6"    },
38 #define smExtArabic        { (char*)0,       "ISO-8859-6"    },
39 #define smGreek            { (char*)0,       (char*)0        },
40 #define smCentralEuroRoman { (char*)0,       "ISO-8859-2"    },
41 #define smIcelandic        { "MAC-IS",       (char*)0        },
42 #define smTradChinese      { (char*)0,       (char*)0        },
43 #define smDevanagari       { (char*)0,       (char*)0        },
44 #define smThai             { (char*)0,       (char*)0        },
45 #define smKorean           { (char*)0,       (char*)0        },
46 #define smSami             { "MAC-SAMI",     (char*)0        },
47 #define smCyrillic         { "MAC-CYRILLIC", (char*)0        },
48 #define smSimpChinese      { (char*)0,       (char*)0        },
49 #define smCeltic           { (char*)0,       (char*)0        },
50 #define smRomanian         { (char*)0,       (char*)0        },
51 #define smUkrainian        { "MAC-UK",       (char*)0        },
52 #define smArmenian         { (char*)0,       (char*)0        },
53 #define smGeorgian         { (char*)0,       (char*)0        },
54 #define smMongolian        { (char*)0,       (char*)0        },
55 #define smTibetan          { (char*)0,       (char*)0        },
56 #define smBengali          { (char*)0,       (char*)0        },
57 #define smGuriati          { (char*)0,       (char*)0        },
58 #define smGurmukhi         { (char*)0,       (char*)0        },
59 #define smOriya            { (char*)0,       (char*)0        },
60 #define smMalayalam        { (char*)0,       (char*)0        },
61 #define smKannada          { (char*)0,       (char*)0        },
62 #define smTamil            { (char*)0,       (char*)0        },
63 #define smTelugu           { (char*)0,       (char*)0        },
64 #define smSinhalese        { (char*)0,       (char*)0        },
65 #define smBurmese          { (char*)0,       (char*)0        },
66 #define smKhmer            { (char*)0,       (char*)0        },
67 #define smLaotian          { (char*)0,       (char*)0        },
68 #define smVietnamese       { (char*)0,       (char*)0        },
69 #define smEthiopic         { (char*)0,       (char*)0        },
70 
71 /* Language / character set codecs */
72 
73 static struct
74   {
75   int  mac_code;  // Integer mac code
76   char language[4];   // 3 character language code
77 
78   struct
79     {
80     char * charset; // Character set (understood by iconv_open)
81     char * charset_fallback; // Character set (understood by iconv_open)
82     } cs;
83   }
84 mac_languages[] =
85   {
86     {   0, "eng", smRoman }, // English
87     {   1, "fra", smRoman }, // French
88     {   2, "ger", smRoman }, // German
89     {   3, "ita", smRoman }, // Italian
90     {   4, "dut", smRoman }, // Dutch
91     {   5, "swe", smRoman }, // Swedish
92     {   6, "spa", smRoman }, // Spanish
93     {   7, "dan", smRoman }, // Danish
94     {   8, "por", smRoman }, // Portuguese
95     {   9, "nor", smRoman }, // Norwegian
96     {  10, "heb", smHebrew }, // Hebrew
97     {  11, "jpn", smJapanese }, // Japanese
98     {  12, "ara", smArabic }, // Arabic
99     {  13, "fin", smRoman }, // Finnish
100     {  14, "gre", smGreek }, // Greek
101     {  15, "ice", smIcelandic }, // Icelandic
102     {  16, "mlt", smRoman }, // Maltese
103     {  17, "tur", smRoman }, // Turkish
104     {  18, "scr", smRoman }, // Croatian
105     {  19, "chi", smTradChinese }, // Traditional Chinese
106     {  20, "urd", smArabic }, // Urdu
107     {  21, "hin", smDevanagari }, // Hindi
108     {  22, "tha", smThai }, // Thai
109     {  23, "kor", smKorean}, // Korean
110     {  24, "lit", smCentralEuroRoman }, // Lithuanian
111     {  25, "pol", smCentralEuroRoman }, // Polish
112     {  26, "hun", smCentralEuroRoman }, // Hungarian
113     {  27, "est", smCentralEuroRoman }, // Estonian
114     {  28, "lav", smCentralEuroRoman }, // Latvian
115     {  29, "smi", smSami }, // Saamisk
116     {  30, "fao", smIcelandic}, // Faeroese
117     {  31, "far", smArabic }, // Farsi
118     {  32, "rus", smCyrillic }, // Russian
119     {  33, "chi", smSimpChinese }, // Simplified Chinese
120     {  34, "dut", smRoman }, // Flemish
121     {  35, "gle", smCeltic }, // Irish
122     {  36, "alb", smRoman }, // Albanian
123     {  37, "rum", smRomanian }, // Romanian
124     {  38, "cze", smCentralEuroRoman }, // Czech
125     {  39, "slo", smCentralEuroRoman }, // Slovak
126     {  40, "slv", smCentralEuroRoman }, // Slovenian
127     {  41, "yid", smHebrew }, // Yiddish
128     {  42, "scc", smCyrillic }, // Serbian
129     {  43, "mac", smCyrillic }, // Macedonian
130     {  44, "bul", smCyrillic }, // Bulgarian
131     {  45, "ukr", smUkrainian }, // Ukrainian
132     {  46, "bel", smCyrillic }, // Byelorussian
133     {  47, "uzb", smCyrillic }, // Uzbek
134     {  48, "kaz", smCyrillic }, // Kazakh
135     {  49, "aze", smCyrillic }, // Azerbaijani (cyrillic)
136     {  50, "aze", smArabic }, // Azerbaijani (arabic)
137     {  51, "arm", smArmenian }, // Armenian
138     {  52, "geo", smGeorgian }, // Georgian
139     {  53, "mol", smCyrillic }, // Moldavian
140     {  54, "kir", smCyrillic }, // Kirghiz
141     {  55, "tgk", smCyrillic }, // Tajiki
142     {  56, "tuk", smCyrillic }, // Turkmen
143     {  57, "mon", smMongolian }, // Mongolian
144     {  58, "mon", smCyrillic }, // Mongolian (cyrillic)
145     {  59, "pus", smArabic }, // Pashto
146     {  60, "kur", smArabic }, // Kurdish
147     {  61, "kas", smArabic }, // Kashmiri
148     {  62, "snd", smExtArabic }, // Sindhi
149     {  63, "tib", smTibetan }, // Tibetan
150     {  64, "nep", smDevanagari }, // Nepali
151     {  65, "san", smDevanagari }, // Sanskrit
152     //    {  66, "", smDevanagari }, // Marathi ??
153     {  67, "ben", smBengali }, // Bengali
154     {  68, "asm", smBengali }, // Assamese
155     {  69, "guj", smGuriati }, // Gujarati
156     {  70, "pan", smGurmukhi }, // Punjabi
157     {  71, "ori", smOriya }, // Oriya
158     {  72, "mal", smMalayalam }, // Malayalam
159     {  73, "kan", smKannada }, // Kannada
160     {  74, "tam", smTamil }, // Tamil
161     {  75, "tel", smTelugu }, // Telugu
162     {  76, "sin", smSinhalese }, // Sinhalese
163     {  77, "bur", smBurmese }, // Burmese
164     {  78, "khm", smKhmer }, // Khmer
165     {  79, "lao", smLaotian }, // Lao
166     {  80, "vie", smVietnamese }, // Vietnamese
167     {  81, "ind", smRoman }, // Indonesian
168     {  82, "tgl", smRoman }, // Tagalog
169     {  83, "may", smRoman }, // Malay (roman)
170     {  84, "may", smArabic }, // Malay (arabic)
171     {  85, "amh", smEthiopic }, // Amharic
172     {  86, "tir", smEthiopic }, // Tigrinya
173     {  87, "orm", smEthiopic }, // Oromo
174     {  88, "som", smRoman }, // Somali
175     {  89, "swa", smRoman }, // Swahili
176     {  90, "kin", smRoman }, // Kinyarwanda
177     {  91, "run", smRoman }, // Rundi
178     {  92, "nya", smRoman }, // Chewa
179     {  93, "mlg", smRoman }, // Malagasy
180     {  94, "epo", smRoman }, // Esperanto
181     { 128, "wel", smRoman }, // Welsh
182     { 129, "baq", smRoman }, // Basque
183     { 130, "cat", smRoman }, // Catalan
184     { 131, "lat", smRoman }, // Latin
185     { 132, "que", smRoman }, // Quechua
186     { 133, "grn", smRoman }, // Guarani
187     { 134, "aym", smRoman }, // Aymara
188     { 135, "tat", smCyrillic }, // Tatar
189     { 136, "uig", smArabic }, // Uighur
190     { 137, "dzo", smTibetan }, // Dzongkha
191     { 138, "jav", smRoman }, // Javanese (roman)
192   };
193 
194 #define NUM_CODES (sizeof(mac_languages)/sizeof(mac_languages[0]))
195 
get_language(quicktime_trak_t * trak,char * ret,lqt_file_type_t file_type)196 static int get_language(quicktime_trak_t * trak, char * ret,
197                         lqt_file_type_t file_type)
198   {
199   int i;
200   if(IS_MP4(file_type))
201     {
202     ret[0] = ((trak->mdia.mdhd.language >> 10) & 0x1f) + 0x60;
203     ret[1] = ((trak->mdia.mdhd.language >> 5) & 0x1f)  + 0x60;
204     ret[2] = (trak->mdia.mdhd.language & 0x1f)         + 0x60;
205     ret[3] = '\0';
206     return 1;
207     }
208   for(i = 0; i < NUM_CODES; i++)
209     {
210     if(trak->mdia.mdhd.language == mac_languages[i].mac_code)
211       {
212       strcpy(ret, mac_languages[i].language);
213       return 1;
214       }
215     }
216   return 0;
217   }
218 
219 static char * unicode_string = LQT_UTF_8_16;
220 
lqt_get_charset(int mac_code,lqt_file_type_t file_type)221 const char * lqt_get_charset(int mac_code, lqt_file_type_t file_type)
222   {
223   int i;
224 
225   if(IS_MP4(file_type))
226     return unicode_string;
227 
228   for(i = 0; i < NUM_CODES; i++)
229     {
230     if(mac_code == mac_languages[i].mac_code)
231       return mac_languages[i].cs.charset;
232     }
233   return (char*)0;
234   }
235 
lqt_get_charset_fallback(int mac_code,lqt_file_type_t file_type)236 const char * lqt_get_charset_fallback(int mac_code, lqt_file_type_t file_type)
237   {
238   int i;
239 
240   if(IS_MP4(file_type))
241     return unicode_string;
242 
243   for(i = 0; i < NUM_CODES; i++)
244     {
245     if(mac_code == mac_languages[i].mac_code)
246       return mac_languages[i].cs.charset_fallback;
247     }
248   return (char*)0;
249   }
250 
251 
set_language_code(quicktime_trak_t * trak,const char * language,lqt_file_type_t file_type)252 static int set_language_code(quicktime_trak_t * trak,
253                              const char * language, lqt_file_type_t file_type)
254   {
255   int i;
256 
257   if(IS_MP4(file_type))
258     {
259     trak->mdia.mdhd.language =
260       ((int)(language[0]-0x60) << 10) |
261       ((int)(language[1]-0x60) << 5) |
262       ((int)(language[2]-0x60));
263     return 0;
264     }
265 
266   for(i = 0; i < NUM_CODES; i++)
267     {
268     if(!strcmp(language, mac_languages[i].language))
269       {
270       trak->mdia.mdhd.language = mac_languages[i].mac_code;
271       return 1;
272       }
273     }
274   return 0;
275   }
276 
lqt_set_audio_language(quicktime_t * file,int track,const char * language)277 void lqt_set_audio_language(quicktime_t * file, int track, const char * language)
278   {
279   if((track < 0) || (track >= file->total_atracks))
280     return;
281   set_language_code((file->atracks[track].track), language, file->file_type);
282   }
283 
lqt_get_audio_language(quicktime_t * file,int track,char * language)284 int  lqt_get_audio_language(quicktime_t * file, int track, char * language)
285   {
286   if((track < 0) || (track >= file->total_atracks))
287     return 0;
288   return get_language(file->atracks[track].track, language, file->file_type);
289   }
290 
291 
lqt_set_text_language(quicktime_t * file,int track,const char * language)292 void lqt_set_text_language(quicktime_t * file, int track, const char * language)
293   {
294   if((track < 0) || (track >= file->total_ttracks))
295     return;
296 
297   set_language_code((file->ttracks[track].track), language, file->file_type);
298   }
299 
lqt_get_text_language(quicktime_t * file,int track,char * language)300 int  lqt_get_text_language(quicktime_t * file, int track, char * language)
301   {
302   if((track < 0) || (track >= file->total_ttracks))
303     return 0;
304   return get_language(file->ttracks[track].track, language, file->file_type);
305   }
306