1#!/usr/bin/env python
2# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""
7localize.py -- Generates an output file from the given template replacing
8variables and localizing strings.
9
10The script uses Jinja2 template processing library (src/third_party/jinja2).
11Variables available to the templates:
12  - |languages| - the list of languages passed on the command line. ('-l').
13  - Each NAME=VALUE define ('-d') can be accesses as {{ NAME }}.
14  - |official_build| is set to '1' when CHROME_BUILD_TYPE environment variable
15    is set to "_official".
16
17Filters:
18  - GetCodepage - returns the code page for the given language.
19  - GetCodepageDecimal same as GetCodepage, but returns a decimal value.
20  - GetLangId - returns Win32 LANGID.
21  - GetPrimaryLanguage - returns a named Win32 constant specifing the primary
22    language ID.
23  - GetSublanguage -  returns a named Win32 constant specifing the sublanguage
24    ID.
25
26Globals:
27  - IsRtlLanguage(language) - returns True if the language is right-to-left.
28  - SelectLanguage(language) - allows to select the language to the used by
29    {% trans %}{% endtrans %} statements.
30
31"""
32
33import io
34import json
35from optparse import OptionParser
36import os
37import sys
38from string import Template
39
40
41# Win32 primary languages IDs.
42_LANGUAGE_PRIMARY = {
43  'LANG_NEUTRAL' : 0x00,
44  'LANG_INVARIANT' : 0x7f,
45  'LANG_AFRIKAANS' : 0x36,
46  'LANG_ALBANIAN' : 0x1c,
47  'LANG_ALSATIAN' : 0x84,
48  'LANG_AMHARIC' : 0x5e,
49  'LANG_ARABIC' : 0x01,
50  'LANG_ARMENIAN' : 0x2b,
51  'LANG_ASSAMESE' : 0x4d,
52  'LANG_AZERI' : 0x2c,
53  'LANG_BASHKIR' : 0x6d,
54  'LANG_BASQUE' : 0x2d,
55  'LANG_BELARUSIAN' : 0x23,
56  'LANG_BENGALI' : 0x45,
57  'LANG_BRETON' : 0x7e,
58  'LANG_BOSNIAN' : 0x1a,
59  'LANG_BULGARIAN' : 0x02,
60  'LANG_CATALAN' : 0x03,
61  'LANG_CHINESE' : 0x04,
62  'LANG_CORSICAN' : 0x83,
63  'LANG_CROATIAN' : 0x1a,
64  'LANG_CZECH' : 0x05,
65  'LANG_DANISH' : 0x06,
66  'LANG_DARI' : 0x8c,
67  'LANG_DIVEHI' : 0x65,
68  'LANG_DUTCH' : 0x13,
69  'LANG_ENGLISH' : 0x09,
70  'LANG_ESTONIAN' : 0x25,
71  'LANG_FAEROESE' : 0x38,
72  'LANG_FILIPINO' : 0x64,
73  'LANG_FINNISH' : 0x0b,
74  'LANG_FRENCH' : 0x0c,
75  'LANG_FRISIAN' : 0x62,
76  'LANG_GALICIAN' : 0x56,
77  'LANG_GEORGIAN' : 0x37,
78  'LANG_GERMAN' : 0x07,
79  'LANG_GREEK' : 0x08,
80  'LANG_GREENLANDIC' : 0x6f,
81  'LANG_GUJARATI' : 0x47,
82  'LANG_HAUSA' : 0x68,
83  'LANG_HEBREW' : 0x0d,
84  'LANG_HINDI' : 0x39,
85  'LANG_HUNGARIAN' : 0x0e,
86  'LANG_ICELANDIC' : 0x0f,
87  'LANG_IGBO' : 0x70,
88  'LANG_INDONESIAN' : 0x21,
89  'LANG_INUKTITUT' : 0x5d,
90  'LANG_IRISH' : 0x3c,
91  'LANG_ITALIAN' : 0x10,
92  'LANG_JAPANESE' : 0x11,
93  'LANG_KANNADA' : 0x4b,
94  'LANG_KASHMIRI' : 0x60,
95  'LANG_KAZAK' : 0x3f,
96  'LANG_KHMER' : 0x53,
97  'LANG_KICHE' : 0x86,
98  'LANG_KINYARWANDA' : 0x87,
99  'LANG_KONKANI' : 0x57,
100  'LANG_KOREAN' : 0x12,
101  'LANG_KYRGYZ' : 0x40,
102  'LANG_LAO' : 0x54,
103  'LANG_LATVIAN' : 0x26,
104  'LANG_LITHUANIAN' : 0x27,
105  'LANG_LOWER_SORBIAN' : 0x2e,
106  'LANG_LUXEMBOURGISH' : 0x6e,
107  'LANG_MACEDONIAN' : 0x2f,
108  'LANG_MALAY' : 0x3e,
109  'LANG_MALAYALAM' : 0x4c,
110  'LANG_MALTESE' : 0x3a,
111  'LANG_MANIPURI' : 0x58,
112  'LANG_MAORI' : 0x81,
113  'LANG_MAPUDUNGUN' : 0x7a,
114  'LANG_MARATHI' : 0x4e,
115  'LANG_MOHAWK' : 0x7c,
116  'LANG_MONGOLIAN' : 0x50,
117  'LANG_NEPALI' : 0x61,
118  'LANG_NORWEGIAN' : 0x14,
119  'LANG_OCCITAN' : 0x82,
120  'LANG_ORIYA' : 0x48,
121  'LANG_PASHTO' : 0x63,
122  'LANG_PERSIAN' : 0x29,
123  'LANG_POLISH' : 0x15,
124  'LANG_PORTUGUESE' : 0x16,
125  'LANG_PUNJABI' : 0x46,
126  'LANG_QUECHUA' : 0x6b,
127  'LANG_ROMANIAN' : 0x18,
128  'LANG_ROMANSH' : 0x17,
129  'LANG_RUSSIAN' : 0x19,
130  'LANG_SAMI' : 0x3b,
131  'LANG_SANSKRIT' : 0x4f,
132  'LANG_SCOTTISH_GAELIC' : 0x91,
133  'LANG_SERBIAN' : 0x1a,
134  'LANG_SINDHI' : 0x59,
135  'LANG_SINHALESE' : 0x5b,
136  'LANG_SLOVAK' : 0x1b,
137  'LANG_SLOVENIAN' : 0x24,
138  'LANG_SOTHO' : 0x6c,
139  'LANG_SPANISH' : 0x0a,
140  'LANG_SWAHILI' : 0x41,
141  'LANG_SWEDISH' : 0x1d,
142  'LANG_SYRIAC' : 0x5a,
143  'LANG_TAJIK' : 0x28,
144  'LANG_TAMAZIGHT' : 0x5f,
145  'LANG_TAMIL' : 0x49,
146  'LANG_TATAR' : 0x44,
147  'LANG_TELUGU' : 0x4a,
148  'LANG_THAI' : 0x1e,
149  'LANG_TIBETAN' : 0x51,
150  'LANG_TIGRIGNA' : 0x73,
151  'LANG_TSWANA' : 0x32,
152  'LANG_TURKISH' : 0x1f,
153  'LANG_TURKMEN' : 0x42,
154  'LANG_UIGHUR' : 0x80,
155  'LANG_UKRAINIAN' : 0x22,
156  'LANG_UPPER_SORBIAN' : 0x2e,
157  'LANG_URDU' : 0x20,
158  'LANG_UZBEK' : 0x43,
159  'LANG_VIETNAMESE' : 0x2a,
160  'LANG_WELSH' : 0x52,
161  'LANG_WOLOF' : 0x88,
162  'LANG_XHOSA' : 0x34,
163  'LANG_YAKUT' : 0x85,
164  'LANG_YI' : 0x78,
165  'LANG_YORUBA' : 0x6a,
166  'LANG_ZULU' : 0x35,
167}
168
169
170# Win32 sublanguage IDs.
171_LANGUAGE_SUB = {
172  'SUBLANG_NEUTRAL' : 0x00,
173  'SUBLANG_DEFAULT' : 0x01,
174  'SUBLANG_SYS_DEFAULT' : 0x02,
175  'SUBLANG_CUSTOM_DEFAULT' : 0x03,
176  'SUBLANG_CUSTOM_UNSPECIFIED' : 0x04,
177  'SUBLANG_UI_CUSTOM_DEFAULT' : 0x05,
178  'SUBLANG_AFRIKAANS_SOUTH_AFRICA' : 0x01,
179  'SUBLANG_ALBANIAN_ALBANIA' : 0x01,
180  'SUBLANG_ALSATIAN_FRANCE' : 0x01,
181  'SUBLANG_AMHARIC_ETHIOPIA' : 0x01,
182  'SUBLANG_ARABIC_SAUDI_ARABIA' : 0x01,
183  'SUBLANG_ARABIC_IRAQ' : 0x02,
184  'SUBLANG_ARABIC_EGYPT' : 0x03,
185  'SUBLANG_ARABIC_LIBYA' : 0x04,
186  'SUBLANG_ARABIC_ALGERIA' : 0x05,
187  'SUBLANG_ARABIC_MOROCCO' : 0x06,
188  'SUBLANG_ARABIC_TUNISIA' : 0x07,
189  'SUBLANG_ARABIC_OMAN' : 0x08,
190  'SUBLANG_ARABIC_YEMEN' : 0x09,
191  'SUBLANG_ARABIC_SYRIA' : 0x0a,
192  'SUBLANG_ARABIC_JORDAN' : 0x0b,
193  'SUBLANG_ARABIC_LEBANON' : 0x0c,
194  'SUBLANG_ARABIC_KUWAIT' : 0x0d,
195  'SUBLANG_ARABIC_UAE' : 0x0e,
196  'SUBLANG_ARABIC_BAHRAIN' : 0x0f,
197  'SUBLANG_ARABIC_QATAR' : 0x10,
198  'SUBLANG_ARMENIAN_ARMENIA' : 0x01,
199  'SUBLANG_ASSAMESE_INDIA' : 0x01,
200  'SUBLANG_AZERI_LATIN' : 0x01,
201  'SUBLANG_AZERI_CYRILLIC' : 0x02,
202  'SUBLANG_BASHKIR_RUSSIA' : 0x01,
203  'SUBLANG_BASQUE_BASQUE' : 0x01,
204  'SUBLANG_BELARUSIAN_BELARUS' : 0x01,
205  'SUBLANG_BENGALI_INDIA' : 0x01,
206  'SUBLANG_BENGALI_BANGLADESH' : 0x02,
207  'SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_LATIN' : 0x05,
208  'SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_CYRILLIC' : 0x08,
209  'SUBLANG_BRETON_FRANCE' : 0x01,
210  'SUBLANG_BULGARIAN_BULGARIA' : 0x01,
211  'SUBLANG_CATALAN_CATALAN' : 0x01,
212  'SUBLANG_CHINESE_TRADITIONAL' : 0x01,
213  'SUBLANG_CHINESE_SIMPLIFIED' : 0x02,
214  'SUBLANG_CHINESE_HONGKONG' : 0x03,
215  'SUBLANG_CHINESE_SINGAPORE' : 0x04,
216  'SUBLANG_CHINESE_MACAU' : 0x05,
217  'SUBLANG_CORSICAN_FRANCE' : 0x01,
218  'SUBLANG_CZECH_CZECH_REPUBLIC' : 0x01,
219  'SUBLANG_CROATIAN_CROATIA' : 0x01,
220  'SUBLANG_CROATIAN_BOSNIA_HERZEGOVINA_LATIN' : 0x04,
221  'SUBLANG_DANISH_DENMARK' : 0x01,
222  'SUBLANG_DARI_AFGHANISTAN' : 0x01,
223  'SUBLANG_DIVEHI_MALDIVES' : 0x01,
224  'SUBLANG_DUTCH' : 0x01,
225  'SUBLANG_DUTCH_BELGIAN' : 0x02,
226  'SUBLANG_ENGLISH_US' : 0x01,
227  'SUBLANG_ENGLISH_UK' : 0x02,
228  'SUBLANG_ENGLISH_AUS' : 0x03,
229  'SUBLANG_ENGLISH_CAN' : 0x04,
230  'SUBLANG_ENGLISH_NZ' : 0x05,
231  'SUBLANG_ENGLISH_EIRE' : 0x06,
232  'SUBLANG_ENGLISH_SOUTH_AFRICA' : 0x07,
233  'SUBLANG_ENGLISH_JAMAICA' : 0x08,
234  'SUBLANG_ENGLISH_CARIBBEAN' : 0x09,
235  'SUBLANG_ENGLISH_BELIZE' : 0x0a,
236  'SUBLANG_ENGLISH_TRINIDAD' : 0x0b,
237  'SUBLANG_ENGLISH_ZIMBABWE' : 0x0c,
238  'SUBLANG_ENGLISH_PHILIPPINES' : 0x0d,
239  'SUBLANG_ENGLISH_INDIA' : 0x10,
240  'SUBLANG_ENGLISH_MALAYSIA' : 0x11,
241  'SUBLANG_ENGLISH_SINGAPORE' : 0x12,
242  'SUBLANG_ESTONIAN_ESTONIA' : 0x01,
243  'SUBLANG_FAEROESE_FAROE_ISLANDS' : 0x01,
244  'SUBLANG_FILIPINO_PHILIPPINES' : 0x01,
245  'SUBLANG_FINNISH_FINLAND' : 0x01,
246  'SUBLANG_FRENCH' : 0x01,
247  'SUBLANG_FRENCH_BELGIAN' : 0x02,
248  'SUBLANG_FRENCH_CANADIAN' : 0x03,
249  'SUBLANG_FRENCH_SWISS' : 0x04,
250  'SUBLANG_FRENCH_LUXEMBOURG' : 0x05,
251  'SUBLANG_FRENCH_MONACO' : 0x06,
252  'SUBLANG_FRISIAN_NETHERLANDS' : 0x01,
253  'SUBLANG_GALICIAN_GALICIAN' : 0x01,
254  'SUBLANG_GEORGIAN_GEORGIA' : 0x01,
255  'SUBLANG_GERMAN' : 0x01,
256  'SUBLANG_GERMAN_SWISS' : 0x02,
257  'SUBLANG_GERMAN_AUSTRIAN' : 0x03,
258  'SUBLANG_GERMAN_LUXEMBOURG' : 0x04,
259  'SUBLANG_GERMAN_LIECHTENSTEIN' : 0x05,
260  'SUBLANG_GREEK_GREECE' : 0x01,
261  'SUBLANG_GREENLANDIC_GREENLAND' : 0x01,
262  'SUBLANG_GUJARATI_INDIA' : 0x01,
263  'SUBLANG_HAUSA_NIGERIA_LATIN' : 0x01,
264  'SUBLANG_HEBREW_ISRAEL' : 0x01,
265  'SUBLANG_HINDI_INDIA' : 0x01,
266  'SUBLANG_HUNGARIAN_HUNGARY' : 0x01,
267  'SUBLANG_ICELANDIC_ICELAND' : 0x01,
268  'SUBLANG_IGBO_NIGERIA' : 0x01,
269  'SUBLANG_INDONESIAN_INDONESIA' : 0x01,
270  'SUBLANG_INUKTITUT_CANADA' : 0x01,
271  'SUBLANG_INUKTITUT_CANADA_LATIN' : 0x02,
272  'SUBLANG_IRISH_IRELAND' : 0x02,
273  'SUBLANG_ITALIAN' : 0x01,
274  'SUBLANG_ITALIAN_SWISS' : 0x02,
275  'SUBLANG_JAPANESE_JAPAN' : 0x01,
276  'SUBLANG_KANNADA_INDIA' : 0x01,
277  'SUBLANG_KASHMIRI_SASIA' : 0x02,
278  'SUBLANG_KASHMIRI_INDIA' : 0x02,
279  'SUBLANG_KAZAK_KAZAKHSTAN' : 0x01,
280  'SUBLANG_KHMER_CAMBODIA' : 0x01,
281  'SUBLANG_KICHE_GUATEMALA' : 0x01,
282  'SUBLANG_KINYARWANDA_RWANDA' : 0x01,
283  'SUBLANG_KONKANI_INDIA' : 0x01,
284  'SUBLANG_KOREAN' : 0x01,
285  'SUBLANG_KYRGYZ_KYRGYZSTAN' : 0x01,
286  'SUBLANG_LAO_LAO' : 0x01,
287  'SUBLANG_LATVIAN_LATVIA' : 0x01,
288  'SUBLANG_LITHUANIAN' : 0x01,
289  'SUBLANG_LOWER_SORBIAN_GERMANY' : 0x02,
290  'SUBLANG_LUXEMBOURGISH_LUXEMBOURG' : 0x01,
291  'SUBLANG_MACEDONIAN_MACEDONIA' : 0x01,
292  'SUBLANG_MALAY_MALAYSIA' : 0x01,
293  'SUBLANG_MALAY_BRUNEI_DARUSSALAM' : 0x02,
294  'SUBLANG_MALAYALAM_INDIA' : 0x01,
295  'SUBLANG_MALTESE_MALTA' : 0x01,
296  'SUBLANG_MAORI_NEW_ZEALAND' : 0x01,
297  'SUBLANG_MAPUDUNGUN_CHILE' : 0x01,
298  'SUBLANG_MARATHI_INDIA' : 0x01,
299  'SUBLANG_MOHAWK_MOHAWK' : 0x01,
300  'SUBLANG_MONGOLIAN_CYRILLIC_MONGOLIA' : 0x01,
301  'SUBLANG_MONGOLIAN_PRC' : 0x02,
302  'SUBLANG_NEPALI_INDIA' : 0x02,
303  'SUBLANG_NEPALI_NEPAL' : 0x01,
304  'SUBLANG_NORWEGIAN_BOKMAL' : 0x01,
305  'SUBLANG_NORWEGIAN_NYNORSK' : 0x02,
306  'SUBLANG_OCCITAN_FRANCE' : 0x01,
307  'SUBLANG_ORIYA_INDIA' : 0x01,
308  'SUBLANG_PASHTO_AFGHANISTAN' : 0x01,
309  'SUBLANG_PERSIAN_IRAN' : 0x01,
310  'SUBLANG_POLISH_POLAND' : 0x01,
311  'SUBLANG_PORTUGUESE' : 0x02,
312  'SUBLANG_PORTUGUESE_BRAZILIAN' : 0x01,
313  'SUBLANG_PUNJABI_INDIA' : 0x01,
314  'SUBLANG_QUECHUA_BOLIVIA' : 0x01,
315  'SUBLANG_QUECHUA_ECUADOR' : 0x02,
316  'SUBLANG_QUECHUA_PERU' : 0x03,
317  'SUBLANG_ROMANIAN_ROMANIA' : 0x01,
318  'SUBLANG_ROMANSH_SWITZERLAND' : 0x01,
319  'SUBLANG_RUSSIAN_RUSSIA' : 0x01,
320  'SUBLANG_SAMI_NORTHERN_NORWAY' : 0x01,
321  'SUBLANG_SAMI_NORTHERN_SWEDEN' : 0x02,
322  'SUBLANG_SAMI_NORTHERN_FINLAND' : 0x03,
323  'SUBLANG_SAMI_LULE_NORWAY' : 0x04,
324  'SUBLANG_SAMI_LULE_SWEDEN' : 0x05,
325  'SUBLANG_SAMI_SOUTHERN_NORWAY' : 0x06,
326  'SUBLANG_SAMI_SOUTHERN_SWEDEN' : 0x07,
327  'SUBLANG_SAMI_SKOLT_FINLAND' : 0x08,
328  'SUBLANG_SAMI_INARI_FINLAND' : 0x09,
329  'SUBLANG_SANSKRIT_INDIA' : 0x01,
330  'SUBLANG_SCOTTISH_GAELIC' : 0x01,
331  'SUBLANG_SERBIAN_BOSNIA_HERZEGOVINA_LATIN' : 0x06,
332  'SUBLANG_SERBIAN_BOSNIA_HERZEGOVINA_CYRILLIC' : 0x07,
333  'SUBLANG_SERBIAN_MONTENEGRO_LATIN' : 0x0b,
334  'SUBLANG_SERBIAN_MONTENEGRO_CYRILLIC' : 0x0c,
335  'SUBLANG_SERBIAN_SERBIA_LATIN' : 0x09,
336  'SUBLANG_SERBIAN_SERBIA_CYRILLIC' : 0x0a,
337  'SUBLANG_SERBIAN_CROATIA' : 0x01,
338  'SUBLANG_SERBIAN_LATIN' : 0x02,
339  'SUBLANG_SERBIAN_CYRILLIC' : 0x03,
340  'SUBLANG_SINDHI_INDIA' : 0x01,
341  'SUBLANG_SINDHI_PAKISTAN' : 0x02,
342  'SUBLANG_SINDHI_AFGHANISTAN' : 0x02,
343  'SUBLANG_SINHALESE_SRI_LANKA' : 0x01,
344  'SUBLANG_SOTHO_NORTHERN_SOUTH_AFRICA' : 0x01,
345  'SUBLANG_SLOVAK_SLOVAKIA' : 0x01,
346  'SUBLANG_SLOVENIAN_SLOVENIA' : 0x01,
347  'SUBLANG_SPANISH' : 0x01,
348  'SUBLANG_SPANISH_MEXICAN' : 0x02,
349  'SUBLANG_SPANISH_MODERN' : 0x03,
350  'SUBLANG_SPANISH_GUATEMALA' : 0x04,
351  'SUBLANG_SPANISH_COSTA_RICA' : 0x05,
352  'SUBLANG_SPANISH_PANAMA' : 0x06,
353  'SUBLANG_SPANISH_DOMINICAN_REPUBLIC' : 0x07,
354  'SUBLANG_SPANISH_VENEZUELA' : 0x08,
355  'SUBLANG_SPANISH_COLOMBIA' : 0x09,
356  'SUBLANG_SPANISH_PERU' : 0x0a,
357  'SUBLANG_SPANISH_ARGENTINA' : 0x0b,
358  'SUBLANG_SPANISH_ECUADOR' : 0x0c,
359  'SUBLANG_SPANISH_CHILE' : 0x0d,
360  'SUBLANG_SPANISH_URUGUAY' : 0x0e,
361  'SUBLANG_SPANISH_PARAGUAY' : 0x0f,
362  'SUBLANG_SPANISH_BOLIVIA' : 0x10,
363  'SUBLANG_SPANISH_EL_SALVADOR' : 0x11,
364  'SUBLANG_SPANISH_HONDURAS' : 0x12,
365  'SUBLANG_SPANISH_NICARAGUA' : 0x13,
366  'SUBLANG_SPANISH_PUERTO_RICO' : 0x14,
367  'SUBLANG_SPANISH_US' : 0x15,
368  'SUBLANG_SWAHILI_KENYA' : 0x01,
369  'SUBLANG_SWEDISH' : 0x01,
370  'SUBLANG_SWEDISH_FINLAND' : 0x02,
371  'SUBLANG_SYRIAC_SYRIA' : 0x01,
372  'SUBLANG_TAJIK_TAJIKISTAN' : 0x01,
373  'SUBLANG_TAMAZIGHT_ALGERIA_LATIN' : 0x02,
374  'SUBLANG_TAMIL_INDIA' : 0x01,
375  'SUBLANG_TATAR_RUSSIA' : 0x01,
376  'SUBLANG_TELUGU_INDIA' : 0x01,
377  'SUBLANG_THAI_THAILAND' : 0x01,
378  'SUBLANG_TIBETAN_PRC' : 0x01,
379  'SUBLANG_TIGRIGNA_ERITREA' : 0x02,
380  'SUBLANG_TSWANA_SOUTH_AFRICA' : 0x01,
381  'SUBLANG_TURKISH_TURKEY' : 0x01,
382  'SUBLANG_TURKMEN_TURKMENISTAN' : 0x01,
383  'SUBLANG_UIGHUR_PRC' : 0x01,
384  'SUBLANG_UKRAINIAN_UKRAINE' : 0x01,
385  'SUBLANG_UPPER_SORBIAN_GERMANY' : 0x01,
386  'SUBLANG_URDU_PAKISTAN' : 0x01,
387  'SUBLANG_URDU_INDIA' : 0x02,
388  'SUBLANG_UZBEK_LATIN' : 0x01,
389  'SUBLANG_UZBEK_CYRILLIC' : 0x02,
390  'SUBLANG_VIETNAMESE_VIETNAM' : 0x01,
391  'SUBLANG_WELSH_UNITED_KINGDOM' : 0x01,
392  'SUBLANG_WOLOF_SENEGAL' : 0x01,
393  'SUBLANG_XHOSA_SOUTH_AFRICA' : 0x01,
394  'SUBLANG_YAKUT_RUSSIA' : 0x01,
395  'SUBLANG_YI_PRC' : 0x01,
396  'SUBLANG_YORUBA_NIGERIA' : 0x01,
397  'SUBLANG_ZULU_SOUTH_AFRICA' : 0x01,
398}
399
400
401'''
402This dictionary defines the language lookup table. The key is the language ISO
403country code, and the value specifies the corresponding code page, primary
404language and sublanguage.
405
406LCID resource: http://msdn.microsoft.com/en-us/library/ms776294.aspx
407Codepage resource: http://www.science.co.il/language/locale-codes.asp
408Language ID resource: http://msdn.microsoft.com/en-us/library/ms776294.aspx
409
410There is no appropriate sublang for Spanish (Latin America) [es-419], so we
411use Mexico. SUBLANG_DEFAULT would incorrectly map to Spain. Unlike other
412Latin American countries, Mexican Spanish is supported by VERSIONINFO:
413http://msdn.microsoft.com/en-us/library/aa381058.aspx
414
415'''
416_LANGUAGE_MAP = {
417  # Language neutral LCID, unicode(1200) code page.
418  'neutral' : [ 1200, 'LANG_NEUTRAL', 'SUBLANG_NEUTRAL' ],
419  # LANG_USER_DEFAULT LCID, unicode(1200) code page.
420  'userdefault' : [ 1200, 'LANG_NEUTRAL', 'SUBLANG_DEFAULT' ],
421  'fake-bidi' : [ 1255, 'LANG_HEBREW', 'SUBLANG_NEUTRAL' ],
422  'af' : [ 1252, 'LANG_AFRIKAANS', 'SUBLANG_DEFAULT' ],
423  'am' : [ 1200, 'LANG_AMHARIC', 'SUBLANG_DEFAULT' ],
424  'ar' : [ 1256, 'LANG_ARABIC', 'SUBLANG_DEFAULT' ],
425  'bg' : [ 1251, 'LANG_BULGARIAN', 'SUBLANG_DEFAULT' ],
426  'bn' : [ 1200, 'LANG_BENGALI', 'SUBLANG_DEFAULT' ],
427  'ca' : [ 1252, 'LANG_CATALAN', 'SUBLANG_DEFAULT' ],
428  'cs' : [ 1250, 'LANG_CZECH', 'SUBLANG_DEFAULT' ],
429  'da' : [ 1252, 'LANG_DANISH', 'SUBLANG_DEFAULT' ],
430  'de' : [ 1252, 'LANG_GERMAN', 'SUBLANG_GERMAN' ],
431  'el' : [ 1253, 'LANG_GREEK', 'SUBLANG_DEFAULT' ],
432  'en' : [ 1200, 'LANG_ENGLISH', 'SUBLANG_ENGLISH_US' ],
433  'en-GB' : [ 1038, 'LANG_ENGLISH', 'SUBLANG_ENGLISH_UK' ],
434  'es' : [ 1252, 'LANG_SPANISH', 'SUBLANG_SPANISH_MODERN' ],
435  # LCID for Mexico; Windows does not support L.A. LCID.
436  'es-419' : [ 1252, 'LANG_SPANISH', 'SUBLANG_SPANISH_MEXICAN' ],
437  'et' : [ 1257, 'LANG_ESTONIAN', 'SUBLANG_DEFAULT' ],
438  'eu' : [ 1252, 'LANG_BASQUE', 'SUBLANG_DEFAULT' ],
439  'fa' : [ 1256, 'LANG_PERSIAN', 'SUBLANG_DEFAULT' ],
440  'fi' : [ 1252, 'LANG_FINNISH', 'SUBLANG_DEFAULT' ],
441  'fil' : [ 1252, 'LANG_FILIPINO', 'SUBLANG_DEFAULT' ],
442  'fr' : [ 1252, 'LANG_FRENCH', 'SUBLANG_FRENCH' ],
443  'fr-CA' : [ 1252, 'LANG_FRENCH', 'SUBLANG_FRENCH_CANADIAN' ],
444  'gl' : [ 1252, 'LANG_GALICIAN', 'SUBLANG_DEFAULT' ],
445  'gu' : [ 1200, 'LANG_GUJARATI', 'SUBLANG_DEFAULT' ],
446  'he' : [ 1255, 'LANG_HEBREW', 'SUBLANG_DEFAULT' ],
447  'hi' : [ 1200, 'LANG_HINDI', 'SUBLANG_DEFAULT' ],
448  'hr' : [ 1252, 'LANG_CROATIAN', 'SUBLANG_DEFAULT' ],
449  'hu' : [ 1250, 'LANG_HUNGARIAN', 'SUBLANG_DEFAULT' ],
450  'id' : [ 1252, 'LANG_INDONESIAN', 'SUBLANG_DEFAULT' ],
451  'is' : [ 1252, 'LANG_ICELANDIC', 'SUBLANG_DEFAULT' ],
452  'it' : [ 1252, 'LANG_ITALIAN', 'SUBLANG_DEFAULT' ],
453  'iw' : [ 1255, 'LANG_HEBREW', 'SUBLANG_DEFAULT' ],
454  'ja' : [ 932, 'LANG_JAPANESE', 'SUBLANG_DEFAULT' ],
455  'kn' : [ 1200, 'LANG_KANNADA', 'SUBLANG_DEFAULT' ],
456  'ko' : [ 949, 'LANG_KOREAN', 'SUBLANG_KOREAN' ],
457  'lt' : [ 1257, 'LANG_LITHUANIAN', 'SUBLANG_LITHUANIAN' ],
458  'lv' : [ 1257, 'LANG_LATVIAN', 'SUBLANG_DEFAULT' ],
459  'ml' : [ 1200, 'LANG_MALAYALAM', 'SUBLANG_DEFAULT' ],
460  'mr' : [ 1200, 'LANG_MARATHI', 'SUBLANG_DEFAULT' ],
461  # Malay (Malaysia) [ms-MY]
462  'ms' : [ 1252, 'LANG_MALAY', 'SUBLANG_DEFAULT' ],
463  'nb' : [ 1252, 'LANG_NORWEGIAN', 'SUBLANG_NORWEGIAN_BOKMAL' ],
464  'ne' : [ 1200, 'LANG_NEPALI', 'SUBLANG_NEPALI_NEPAL' ],
465  'nl' : [ 1252, 'LANG_DUTCH', 'SUBLANG_DEFAULT' ],
466  'nn' : [ 1252, 'LANG_NORWEGIAN', 'SUBLANG_NORWEGIAN_NYNORSK' ],
467  'no' : [ 1252, 'LANG_NORWEGIAN', 'SUBLANG_DEFAULT' ],
468  'or' : [ 1200, 'LANG_ORIYA', 'SUBLANG_DEFAULT' ],
469  'pa' : [ 1200, 'LANG_PUNJABI', 'SUBLANG_PUNJABI_INDIA' ],
470  'pl' : [ 1250, 'LANG_POLISH', 'SUBLANG_DEFAULT' ],
471  'pt-BR' : [ 1252, 'LANG_PORTUGUESE', 'SUBLANG_DEFAULT' ],
472  'pt-PT' : [ 1252, 'LANG_PORTUGUESE', 'SUBLANG_PORTUGUESE' ],
473  'ro' : [ 1250, 'LANG_ROMANIAN', 'SUBLANG_DEFAULT' ],
474  'ru' : [ 1251, 'LANG_RUSSIAN', 'SUBLANG_DEFAULT' ],
475  'sa' : [ 1200, 'LANG_SANSKRIT', 'SUBLANG_SANSKRIT_INDIA' ],
476  'si' : [ 1200, 'LANG_SINHALESE', 'SUBLANG_SINHALESE_SRI_LANKA' ],
477  'sk' : [ 1250, 'LANG_SLOVAK', 'SUBLANG_DEFAULT' ],
478  'sl' : [ 1250, 'LANG_SLOVENIAN', 'SUBLANG_DEFAULT' ],
479  'sr' : [ 1250, 'LANG_SERBIAN', 'SUBLANG_SERBIAN_LATIN' ],
480  'sv' : [ 1252, 'LANG_SWEDISH', 'SUBLANG_SWEDISH' ],
481  'sw' : [ 1252, 'LANG_SWAHILI', 'SUBLANG_DEFAULT' ],
482  'ta' : [ 1200, 'LANG_TAMIL', 'SUBLANG_DEFAULT' ],
483  'te' : [ 1200, 'LANG_TELUGU', 'SUBLANG_DEFAULT' ],
484  'th' : [ 874, 'LANG_THAI', 'SUBLANG_DEFAULT' ],
485  'ti' : [ 1200, 'LANG_TIGRIGNA', 'SUBLANG_TIGRIGNA_ERITREA' ],
486  'tr' : [ 1254, 'LANG_TURKISH', 'SUBLANG_DEFAULT' ],
487  'uk' : [ 1251, 'LANG_UKRAINIAN', 'SUBLANG_DEFAULT' ],
488  'ur' : [ 1200, 'LANG_URDU', 'SUBLANG_DEFAULT' ],
489  'vi' : [ 1258, 'LANG_VIETNAMESE', 'SUBLANG_DEFAULT' ],
490  'zh-CN' : [ 936, 'LANG_CHINESE', 'SUBLANG_CHINESE_SIMPLIFIED' ],
491  'zh-HK' : [ 950, 'LANG_CHINESE', 'SUBLANG_CHINESE_HONGKONG' ],
492  'zh-TW' : [ 950, 'LANG_CHINESE', 'SUBLANG_CHINESE_TRADITIONAL' ],
493  'zu' : [ 1200, 'LANG_ZULU', 'SUBLANG_DEFAULT' ],
494}
495
496
497# Right-To-Left languages
498_RTL_LANGUAGES = (
499  'ar',  # Arabic
500  'fa',  # Farsi
501  'iw',  # Hebrew
502  'ks',  # Kashmiri
503  'ku',  # Kurdish
504  'ps',  # Pashto
505  'ur',  # Urdu
506  'yi',  # Yiddish
507)
508
509
510def GetCodepage(language):
511  """ Returns the codepage for the given |language|. """
512  lang = _LANGUAGE_MAP[language]
513  return "%04x" % lang[0]
514
515
516def GetCodepageDecimal(language):
517  """ Returns the codepage for the given |language| as a decimal value. """
518  lang = _LANGUAGE_MAP[language]
519  return "%d" % lang[0]
520
521
522def GetLangId(language):
523  """ Returns the language id for the given |language|. """
524  lang = _LANGUAGE_MAP[language]
525  return "%04x" % (_LANGUAGE_PRIMARY[lang[1]] | (_LANGUAGE_SUB[lang[2]] << 10))
526
527
528def GetPrimaryLanguage(language):
529  """ Returns the primary language ID for the given |language|. """
530  lang = _LANGUAGE_MAP[language]
531  return _LANGUAGE_PRIMARY[lang[1]]
532
533
534def GetSublanguage(language):
535  """ Returns the sublanguage ID for the given |language|. """
536  lang = _LANGUAGE_MAP[language]
537  return _LANGUAGE_SUB[lang[2]]
538
539
540def IsRtlLanguage(language):
541  return language in _RTL_LANGUAGES;
542
543
544def NormalizeLanguageCode(language):
545  lang = language.replace('_', '-', 1)
546  if lang == 'en-US':
547    lang = 'en'
548  return lang
549
550
551def GetDataPackageSuffix(language):
552  lang = NormalizeLanguageCode(language)
553  if lang == 'en':
554    lang = 'en-US'
555  return lang
556
557
558def GetJsonSuffix(language):
559  return language.replace('-', '_', 1)
560
561
562def ReadValuesFromFile(values_dict, file_name):
563  """
564  Reads NAME=VALUE settings from the specified file.
565
566  Everything to the left of the first '=' is the keyword,
567  everything to the right is the value.  No stripping of
568  white space, so beware.
569
570  The file must exist, otherwise you get the Python exception from open().
571  """
572  for line in open(file_name, 'r').readlines():
573    key, val = line.rstrip('\r\n').split('=', 1)
574    values_dict[key] = val
575
576
577def ReadMessagesFromFile(file_name):
578  """
579  Reads messages from a 'chrome_messages_json' file.
580
581  The file must exist, otherwise you get the Python exception from open().
582  """
583  messages_file = io.open(file_name, encoding='utf-8-sig')
584  messages = json.load(messages_file)
585  messages_file.close()
586
587  values = {}
588  for key in messages.keys():
589    values[key] = unicode(messages[key]['message']);
590  return values
591
592
593def WriteIfChanged(file_name, contents, encoding):
594  """
595  Writes the specified contents to the specified file_name
596  iff the contents are different than the current contents.
597  """
598  try:
599    target = io.open(file_name, 'r')
600    old_contents = target.read()
601  except EnvironmentError:
602    pass
603  except UnicodeDecodeError:
604    target.close()
605    os.unlink(file_name)
606  else:
607    if contents == old_contents:
608      return
609    target.close()
610    os.unlink(file_name)
611  io.open(file_name, 'w', encoding=encoding).write(contents)
612
613
614class MessageMap:
615  """ Provides a dictionary of localized messages for each language."""
616  def __init__(self, languages, locale_dir):
617    self.language = None
618    self.message_map = {}
619
620    # Populate the message map
621    if locale_dir:
622      for language in languages:
623        file_name = os.path.join(locale_dir,
624                                 GetJsonSuffix(language),
625                                 'messages.json')
626        self.message_map[language] = ReadMessagesFromFile(file_name)
627
628  def GetText(self, message):
629    """ Returns a localized message for the current language. """
630    try:
631      return self.message_map[self.language][message]
632    except:
633      return self.message_map['en'][message]
634
635  def SelectLanguage(self, language):
636    """ Selects the language to be used when retrieving localized messages. """
637    self.language = language
638
639  def MakeSelectLanguage(self):
640    """ Returns a function that can be used to select the current language. """
641    return lambda language: self.SelectLanguage(language)
642
643  def MakeGetText(self):
644    """ Returns a function that can be used to retrieve a localized message. """
645    return lambda message: self.GetText(message)
646
647
648# Use '@' as a delimiter for string templates instead of '$' to avoid unintended
649# expansion when passing the string from GYP.
650class GypTemplate(Template):
651    delimiter = '@'
652
653
654def Localize(source, locales, options):
655  # Set the list of languages to use.
656  languages = map(NormalizeLanguageCode, locales)
657  # Remove duplicates.
658  languages = sorted(set(languages))
659  context = { 'languages' : languages }
660
661  # Load the localized messages.
662  message_map = MessageMap(languages, options.locale_dir)
663
664  # Add OFFICIAL_BUILD variable the same way build/util/version.py
665  # does.
666  if os.environ.get('CHROME_BUILD_TYPE') == '_official':
667    context['official_build'] = '1'
668  else:
669    context['official_build'] = '0'
670
671  # Add all variables defined in the command line.
672  if options.define:
673    for define in options.define:
674      context.update(dict([define.split('=', 1)]));
675
676  # Read NAME=VALUE variables from file.
677  if options.variables:
678    for file_name in options.variables:
679      ReadValuesFromFile(context, file_name)
680
681  env = None
682  template = None
683
684  if source:
685    # Load jinja2 library.
686    if options.jinja2:
687      jinja2_path = os.path.normpath(options.jinja2)
688    else:
689      jinja2_path = os.path.normpath(
690          os.path.join(os.path.abspath(__file__),
691                       '../../../../third_party/jinja2'))
692    sys.path.append(os.path.split(jinja2_path)[0])
693    from jinja2 import Environment, FileSystemLoader
694
695    # Create jinja2 environment.
696    (template_path, template_name) = os.path.split(source)
697    env = Environment(loader=FileSystemLoader(template_path),
698                      extensions=['jinja2.ext.do', 'jinja2.ext.i18n'])
699
700    # Register custom filters.
701    env.filters['GetCodepage'] = GetCodepage
702    env.filters['GetCodepageDecimal'] = GetCodepageDecimal
703    env.filters['GetLangId'] = GetLangId
704    env.filters['GetPrimaryLanguage'] = GetPrimaryLanguage
705    env.filters['GetSublanguage'] = GetSublanguage
706
707    # Register the message map with jinja2.i18n extension.
708    env.globals['IsRtlLanguage'] = IsRtlLanguage
709    env.globals['SelectLanguage'] = message_map.MakeSelectLanguage()
710    env.install_gettext_callables(message_map.MakeGetText(),
711                                  message_map.MakeGetText());
712
713    template = env.get_template(template_name)
714
715  # Generate a separate file per each locale if requested.
716  outputs = []
717  if options.locale_output:
718    target = GypTemplate(options.locale_output)
719    for lang in languages:
720      context['languages'] = [ lang ]
721      context['language'] = lang
722      context['pak_suffix'] = GetDataPackageSuffix(lang)
723      context['json_suffix'] = GetJsonSuffix(lang)
724      message_map.SelectLanguage(lang)
725
726      template_file_name = target.safe_substitute(context)
727      outputs.append(template_file_name)
728      if not options.print_only and not options.locales_listfile:
729        WriteIfChanged(template_file_name, template.render(context),
730                       options.encoding)
731  else:
732    outputs.append(options.output)
733    if not options.print_only:
734      WriteIfChanged(options.output, template.render(context), options.encoding)
735
736  if options.print_only:
737    # Quote each element so filename spaces don't mess up gyp's attempt to parse
738    # it into a list.
739    return " ".join(['"%s"' % x for x in outputs])
740
741  if options.locales_listfile:
742    # Strip off the quotes from each filename when writing into a listfile.
743    content = u'\n'.join([x.strip('"') for x in outputs])
744    WriteIfChanged(options.locales_listfile, content, options.encoding)
745
746  return
747
748
749def DoMain(argv):
750  usage = "Usage: localize [options] locales"
751  parser = OptionParser(usage=usage)
752  parser.add_option(
753      '-d', '--define', dest='define', action='append', type='string',
754      help='define a variable (NAME=VALUE).')
755  parser.add_option(
756      '--encoding', dest='encoding', type='string', default='utf-8',
757      help="set the encoding of <output>. 'utf-8' is the default.")
758  parser.add_option(
759      '--jinja2', dest='jinja2', type='string',
760      help="specifies path to the jinja2 library.")
761  parser.add_option(
762      '--locale_dir', dest='locale_dir', type='string',
763      help="set path to localized message files.")
764  parser.add_option(
765      '--locale_output', dest='locale_output',  type='string',
766      help='specify the per-locale output file name.')
767  parser.add_option(
768      '-o', '--output', dest='output', type='string',
769      help="specify the output file name.")
770  parser.add_option(
771      '--print_only', dest='print_only', action='store_true',
772      default=False, help='print the output file names only.')
773  parser.add_option(
774      '--locales_listfile', dest='locales_listfile', type='string',
775      help='print the output file names into the specified file.')
776  parser.add_option(
777      '-t', '--template', dest='template', type='string',
778      help="specify the template file name.")
779  parser.add_option(
780      '--variables', dest='variables', action='append', type='string',
781      help='read variables (NAME=VALUE) from file.')
782
783  options, locales = parser.parse_args(argv)
784  if not locales:
785    parser.error('At least one locale must be specified')
786  if bool(options.output) == bool(options.locale_output):
787    parser.error(
788        'Either --output or --locale_output must be specified but not both')
789  if (not options.template and
790      not options.print_only and not options.locales_listfile):
791    parser.error('The template name is required unless either --print_only '
792                 'or --locales_listfile is used')
793
794  return Localize(options.template, locales, options)
795
796if __name__ == '__main__':
797  sys.exit(DoMain(sys.argv[1:]))
798