1#!/usr/bin/env python2
2#############################################################################
3##
4## Copyright (C) 2020 The Qt Company Ltd.
5## Contact: https://www.qt.io/licensing/
6##
7## This file is part of the test suite of the Qt Toolkit.
8##
9## $QT_BEGIN_LICENSE:GPL-EXCEPT$
10## Commercial License Usage
11## Licensees holding valid commercial Qt licenses may use this file in
12## accordance with the commercial license agreement provided with the
13## Software or, alternatively, in accordance with the terms contained in
14## a written agreement between you and The Qt Company. For licensing terms
15## and conditions see https://www.qt.io/terms-conditions. For further
16## information use the contact form at https://www.qt.io/contact-us.
17##
18## GNU General Public License Usage
19## Alternatively, this file may be used under the terms of the GNU
20## General Public License version 3 as published by the Free Software
21## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
22## included in the packaging of this file. Please review the following
23## information to ensure the GNU General Public License requirements will
24## be met: https://www.gnu.org/licenses/gpl-3.0.html.
25##
26## $QT_END_LICENSE$
27##
28#############################################################################
29"""Script to generate C++ code from CLDR data in qLocaleXML form
30
31See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself.
32Pass the output file from that as first parameter to this script; pass
33the root of the qtbase check-out as second parameter.
34"""
35
36import os
37import datetime
38
39from qlocalexml import QLocaleXmlReader
40from xml.dom import minidom
41from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor
42
43def compareLocaleKeys(key1, key2):
44    if key1 == key2:
45        return 0
46
47    if key1[0] != key2[0]: # First sort by language:
48        return key1[0] - key2[0]
49
50    defaults = compareLocaleKeys.default_map
51    # maps {(language, script): country} by ID
52    try:
53        country = defaults[key1[:2]]
54    except KeyError:
55        pass
56    else:
57        if key1[2] == country:
58            return -1
59        if key2[2] == country:
60            return 1
61
62    if key1[1] == key2[1]:
63        return key1[2] - key2[2]
64
65    try:
66        country = defaults[key2[:2]]
67    except KeyError:
68        pass
69    else:
70        if key2[2] == country:
71            return 1
72        if key1[2] == country:
73            return -1
74
75    return key1[1] - key2[1]
76
77
78class StringDataToken:
79    def __init__(self, index, length):
80        if index > 0xFFFF or length > 0xFFFF:
81            raise Error("Position exceeds ushort range: {},{}".format(index, length))
82        self.index = index
83        self.length = length
84    def __str__(self):
85        return " {},{} ".format(self.index, self.length)
86
87class StringData:
88    def __init__(self, name):
89        self.data = []
90        self.hash = {}
91        self.name = name
92
93    def append(self, s):
94        if s in self.hash:
95            return self.hash[s]
96
97        lst = unicode2hex(s)
98        index = len(self.data)
99        if index > 0xffff:
100            raise Error('Data index {} is too big for uint16!'.format(index))
101        size = len(lst)
102        if size >= 0xffff:
103            raise Error('Data is too big ({}) for uint16 size!'.format(size))
104        token = None
105        try:
106            token = StringDataToken(index, size)
107        except Error as e:
108            e.message += '(on data "{}")'.format(s)
109            raise
110        self.hash[s] = token
111        self.data += lst
112        return token
113
114    def write(self, fd):
115        fd.write("\nstatic const ushort {}[] = {{\n".format(self.name))
116        fd.write(wrap_list(self.data))
117        fd.write("\n};\n")
118
119def currencyIsoCodeData(s):
120    if s:
121        return '{' + ",".join(str(ord(x)) for x in s) + '}'
122    return "{0,0,0}"
123
124class LocaleSourceEditor (SourceFileEditor):
125    __upinit = SourceFileEditor.__init__
126    def __init__(self, path, temp, version):
127        self.__upinit(path, temp)
128        self.writer.write("""
129/*
130    This part of the file was generated on {} from the
131    Common Locale Data Repository v{}
132
133    http://www.unicode.org/cldr/
134
135    Do not edit this section: instead regenerate it using
136    cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
137    edited) CLDR data; see qtbase/util/locale_database/.
138*/
139
140""".format(datetime.date.today(), version))
141
142class LocaleDataWriter (LocaleSourceEditor):
143    def likelySubtags(self, likely):
144        self.writer.write('static const QLocaleId likely_subtags[] = {\n')
145        for had, have, got, give, last in likely:
146            self.writer.write('    {{ {:3d}, {:3d}, {:3d} }}'.format(*have))
147            self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give))
148            self.writer.write(' ' if last else ',')
149            self.writer.write(' // {} -> {}\n'.format(had, got))
150        self.writer.write('};\n\n')
151
152    def localeIndex(self, indices):
153        self.writer.write('static const quint16 locale_index[] = {\n')
154        for pair in indices:
155            self.writer.write('{:6d}, // {}\n'.format(*pair))
156        self.writer.write('     0 // trailing 0\n')
157        self.writer.write('};\n\n')
158
159    def localeData(self, locales, names):
160        list_pattern_part_data = StringData('list_pattern_part_data')
161        date_format_data = StringData('date_format_data')
162        time_format_data = StringData('time_format_data')
163        days_data = StringData('days_data')
164        am_data = StringData('am_data')
165        pm_data = StringData('pm_data')
166        byte_unit_data = StringData('byte_unit_data')
167        currency_symbol_data = StringData('currency_symbol_data')
168        currency_display_name_data = StringData('currency_display_name_data')
169        currency_format_data = StringData('currency_format_data')
170        endonyms_data = StringData('endonyms_data')
171
172        # Locale data
173        self.writer.write('static const QLocaleData locale_data[] = {\n')
174        # Table headings: keep each label centred in its field, matching line_format:
175        self.writer.write('   // '
176                          # Width 6 + comma
177                          ' lang  ' # IDs
178                          'script '
179                          '  terr '
180                          '  dec  ' # Numeric punctuation
181                          ' group '
182                          ' list  ' # Delimiter for *numeric* lists
183                          ' prcnt ' # Arithmetic symbols
184                          '  zero '
185                          ' minus '
186                          ' plus  '
187                          '  exp  '
188                          # Width 8 + comma - to make space for these wide labels !
189                          ' quotOpn ' # Quotation marks
190                          ' quotEnd '
191                          'altQtOpn '
192                          'altQtEnd '
193                          # Width 11 + comma
194                          '  lpStart   ' # List pattern
195                          '   lpMid    '
196                          '   lpEnd    '
197                          '   lpTwo    '
198                          '   sDtFmt   ' # Date format
199                          '   lDtFmt   '
200                          '   sTmFmt   ' # Time format
201                          '   lTmFmt   '
202                          '   ssDays   ' # Days
203                          '   slDays   '
204                          '   snDays   '
205                          '    sDays   '
206                          '    lDays   '
207                          '    nDays   '
208                          '     am     ' # am/pm indicators
209                          '     pm     '
210                          # Width 8 + comma
211                          '  byte   '
212                          ' siQuant '
213                          'iecQuant '
214                          # Width 8+4 + comma
215                          '   currISO   '
216                          # Width 11 + comma
217                          '  currSym   ' # Currency formatting
218                          ' currDsply  '
219                          '  currFmt   '
220                          ' currFmtNeg '
221                          '  endoLang  ' # Name of language in itself, and of country
222                          '  endoCntry '
223                          # Width 6 + comma
224                          'curDgt ' # Currency number representation
225                          'curRnd '
226                          'dow1st ' # First day of week
227                          ' wknd+ ' # Week-end start/end days
228                          ' wknd-'
229                          # No trailing space on last entry (be sure to
230                          # pad before adding anything after it).
231                          '\n')
232
233        formatLine = ''.join((
234            '    {{ ',
235            # Locale-identifier
236            '{:6d},' * 3,
237            # Numeric formats, list delimiter
238            '{:6d},' * 8,
239            # Quotation marks
240            '{:8d},' * 4,
241            # List patterns, date/time formats, month/day names, am/pm
242            '{:>11s},' * 16,
243            # SI/IEC byte-unit abbreviations
244            '{:>8s},' * 3,
245            # Currency ISO code
246            ' {:>10s}, ',
247            # Currency and endonyms
248            '{:>11s},' * 6,
249            # Currency formatting
250            '{:6d},{:6d}',
251            # Day of week and week-end
252            ',{:6d}' * 3,
253            ' }}')).format
254        for key in names:
255            locale = locales[key]
256            self.writer.write(formatLine(
257                    key[0], key[1], key[2],
258                    locale.decimal,
259                    locale.group,
260                    locale.listDelim,
261                    locale.percent,
262                    locale.zero,
263                    locale.minus,
264                    locale.plus,
265                    locale.exp,
266                    locale.quotationStart,
267                    locale.quotationEnd,
268                    locale.alternateQuotationStart,
269                    locale.alternateQuotationEnd,
270                    list_pattern_part_data.append(locale.listPatternPartStart),
271                    list_pattern_part_data.append(locale.listPatternPartMiddle),
272                    list_pattern_part_data.append(locale.listPatternPartEnd),
273                    list_pattern_part_data.append(locale.listPatternPartTwo),
274                    date_format_data.append(locale.shortDateFormat),
275                    date_format_data.append(locale.longDateFormat),
276                    time_format_data.append(locale.shortTimeFormat),
277                    time_format_data.append(locale.longTimeFormat),
278                    days_data.append(locale.standaloneShortDays),
279                    days_data.append(locale.standaloneLongDays),
280                    days_data.append(locale.standaloneNarrowDays),
281                    days_data.append(locale.shortDays),
282                    days_data.append(locale.longDays),
283                    days_data.append(locale.narrowDays),
284                    am_data.append(locale.am),
285                    pm_data.append(locale.pm),
286                    byte_unit_data.append(locale.byte_unit),
287                    byte_unit_data.append(locale.byte_si_quantified),
288                    byte_unit_data.append(locale.byte_iec_quantified),
289                    currencyIsoCodeData(locale.currencyIsoCode),
290                    currency_symbol_data.append(locale.currencySymbol),
291                    currency_display_name_data.append(locale.currencyDisplayName),
292                    currency_format_data.append(locale.currencyFormat),
293                    currency_format_data.append(locale.currencyNegativeFormat),
294                    endonyms_data.append(locale.languageEndonym),
295                    endonyms_data.append(locale.countryEndonym),
296                    locale.currencyDigits,
297                    locale.currencyRounding, # unused (QTBUG-81343)
298                    locale.firstDayOfWeek,
299                    locale.weekendStart,
300                    locale.weekendEnd)
301                              + ', // {}/{}/{}\n'.format(
302                    locale.language, locale.script, locale.country))
303        self.writer.write(formatLine(*( # All zeros, matching the format:
304                    (0,) * (3 + 8 + 4) + ('0,0',) * (16 + 3)
305                    + (currencyIsoCodeData(0),)
306                    + ('0,0',) * 6 + (0,) * (2 + 3) ))
307                          + ' // trailing zeros\n')
308        self.writer.write('};\n')
309
310        # StringData tables:
311        for data in (list_pattern_part_data, date_format_data,
312                     time_format_data, days_data,
313                     byte_unit_data, am_data, pm_data, currency_symbol_data,
314                     currency_display_name_data, currency_format_data,
315                     endonyms_data):
316            data.write(self.writer)
317
318    @staticmethod
319    def __writeNameData(out, book, form):
320        out('static const char {}_name_list[] =\n'.format(form))
321        out('"Default\\0"\n')
322        for key, value in book.items():
323            if key == 0:
324                continue
325            out('"' + value[0] + '\\0"\n')
326        out(';\n\n')
327
328        out('static const quint16 {}_name_index[] = {{\n'.format(form))
329        out('     0, // Any{}\n'.format(form.capitalize()))
330        index = 8
331        for key, value in book.items():
332            if key == 0:
333                continue
334            name = value[0]
335            out('{:6d}, // {}\n'.format(index, name))
336            index += len(name) + 1
337        out('};\n\n')
338
339    @staticmethod
340    def __writeCodeList(out, book, form, width):
341        out('static const unsigned char {}_code_list[] =\n'.format(form))
342        for key, value in book.items():
343            code = value[1]
344            code += r'\0' * max(width - len(code), 0)
345            out('"{}" // {}\n'.format(code, value[0]))
346        out(';\n\n')
347
348    def languageNames(self, languages):
349        self.__writeNameData(self.writer.write, languages, 'language')
350
351    def scriptNames(self, scripts):
352        self.__writeNameData(self.writer.write, scripts, 'script')
353
354    def countryNames(self, countries):
355        self.__writeNameData(self.writer.write, countries, 'country')
356
357    # TODO: unify these next three into the previous three; kept
358    # separate for now to verify we're not changing data.
359
360    def languageCodes(self, languages):
361        self.__writeCodeList(self.writer.write, languages, 'language', 3)
362
363    def scriptCodes(self, scripts):
364        self.__writeCodeList(self.writer.write, scripts, 'script', 4)
365
366    def countryCodes(self, countries): # TODO: unify with countryNames()
367        self.__writeCodeList(self.writer.write, countries, 'country', 3)
368
369class CalendarDataWriter (LocaleSourceEditor):
370    formatCalendar = ''.join((
371        '      {{',
372        '{:6d}',
373        ',{:6d}' * 2,
374        ',{{{:>5s}}}' * 6,
375        '}}, ')).format
376    def write(self, calendar, locales, names):
377        months_data = StringData('months_data')
378
379        self.writer.write('static const QCalendarLocale locale_data[] = {\n')
380        self.writer.write('   // '
381                          # IDs, width 7 (6 + comma)
382                          + ' lang  '
383                          + ' script'
384                          + ' terr  '
385                          # Month-name start-end pairs, width 8 (5 plus '{},'):
386                          + ' sShort '
387                          + ' sLong  '
388                          + ' sNarrow'
389                          + ' short  '
390                          + ' long   '
391                          + ' narrow'
392                          # No trailing space on last; be sure
393                          # to pad before adding later entries.
394                          + '\n')
395        for key in names:
396            locale = locales[key]
397            self.writer.write(
398                self.formatCalendar(
399                    key[0], key[1], key[2],
400                    months_data.append(locale.standaloneShortMonths[calendar]),
401                    months_data.append(locale.standaloneLongMonths[calendar]),
402                    months_data.append(locale.standaloneNarrowMonths[calendar]),
403                    months_data.append(locale.shortMonths[calendar]),
404                    months_data.append(locale.longMonths[calendar]),
405                    months_data.append(locale.narrowMonths[calendar]))
406                + '// {}/{}/{}\n'.format(locale.language, locale.script, locale.country))
407        self.writer.write(self.formatCalendar(*( (0,) * 3 + ('0,0',) * 6 ))
408                          + '// trailing zeros\n')
409        self.writer.write('};\n')
410        months_data.write(self.writer)
411
412class LocaleHeaderWriter (SourceFileEditor):
413    __upinit = SourceFileEditor.__init__
414    def __init__(self, path, temp, dupes):
415        self.__upinit(path, temp)
416        self.__dupes = dupes
417
418    def languages(self, languages):
419        self.__enum('Language', languages, self.__language)
420        self.writer.write('\n')
421
422    def countries(self, countries):
423        self.__enum('Country', countries, self.__country)
424
425    def scripts(self, scripts):
426        self.__enum('Script', scripts, self.__script)
427        self.writer.write('\n')
428
429    # Implementation details
430    from enumdata import (language_aliases as __language,
431                          country_aliases as __country,
432                          script_aliases as __script)
433
434    def __enum(self, name, book, alias):
435        assert book
436        out, dupes = self.writer.write, self.__dupes
437        out('    enum {} {{\n'.format(name))
438        for key, value in book.items():
439            member = value[0]
440            if name == 'Script':
441                # Don't .capitalize() as some names are already camel-case (see enumdata.py):
442                member = ''.join(word[0].upper() + word[1:] for word in member.split())
443                if not member.endswith('Script'):
444                    member += 'Script'
445                if member in dupes:
446                    raise Error('The script name "{}" is messy'.format(member))
447            else:
448                member = ''.join(member.split())
449                member = member + name if member in dupes else member
450            out('        {} = {},\n'.format(member, key))
451
452        out('\n        '
453            + ',\n        '.join('{} = {}'.format(*pair)
454                                 for pair in sorted(alias.items()))
455            + ',\n\n        Last{} = {}\n    }};\n'.format(name, member))
456
457def usage(name, err, message = ''):
458    err.write("""Usage: {} path/to/qlocale.xml root/of/qtbase
459""".format(name)) # TODO: elaborate
460    if message:
461        err.write('\n' + message + '\n')
462
463def main(args, out, err):
464    # TODO: Make calendars a command-line parameter
465    # map { CLDR name: Qt file name }
466    calendars = {'gregorian': 'roman', 'persian': 'jalali', 'islamic': 'hijri',} # 'hebrew': 'hebrew',
467
468    name = args.pop(0)
469    if len(args) != 2:
470        usage(name, err, 'I expect two arguments')
471        return 1
472
473    qlocalexml = args.pop(0)
474    qtsrcdir = args.pop(0)
475
476    if not (os.path.isdir(qtsrcdir)
477            and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'text', leaf))
478                    for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))):
479        usage(name, err, 'Missing expected files under qtbase source root ' + qtsrcdir)
480        return 1
481
482    reader = QLocaleXmlReader(qlocalexml)
483    locale_map = dict(reader.loadLocaleMap(calendars, err.write))
484
485    locale_keys = locale_map.keys()
486    compareLocaleKeys.default_map = dict(reader.defaultMap())
487    locale_keys.sort(compareLocaleKeys)
488
489    try:
490        writer = LocaleDataWriter(os.path.join(qtsrcdir,  'src', 'corelib', 'text',
491                                               'qlocale_data_p.h'),
492                                  qtsrcdir, reader.cldrVersion)
493    except IOError as e:
494        err.write('Failed to open files to transcribe locale data: ' + (e.message or e.args[1]))
495        return 1
496
497    try:
498        writer.likelySubtags(reader.likelyMap())
499        writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map)))
500        writer.localeData(locale_map, locale_keys)
501        writer.writer.write('\n')
502        writer.languageNames(reader.languages)
503        writer.scriptNames(reader.scripts)
504        writer.countryNames(reader.countries)
505        # TODO: merge the next three into the previous three
506        writer.languageCodes(reader.languages)
507        writer.scriptCodes(reader.scripts)
508        writer.countryCodes(reader.countries)
509    except Error as e:
510        writer.cleanup()
511        err.write('\nError updating locale data: ' + e.message + '\n')
512        return 1
513
514    writer.close()
515
516    # Generate calendar data
517    for calendar, stem in calendars.items():
518        try:
519            writer = CalendarDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'time',
520                                                     'q{}calendar_data_p.h'.format(stem)),
521                                        qtsrcdir, reader.cldrVersion)
522        except IOError as e:
523            err.write('Failed to open files to transcribe ' + calendar
524                             + ' data ' + (e.message or e.args[1]))
525            return 1
526
527        try:
528            writer.write(calendar, locale_map, locale_keys)
529        except Error as e:
530            writer.cleanup()
531            err.write('\nError updating ' + calendar + ' locale data: ' + e.message + '\n')
532            return 1
533
534        writer.close()
535
536    # qlocale.h
537    try:
538        writer = LocaleHeaderWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.h'),
539                                    qtsrcdir, reader.dupes)
540    except IOError as e:
541        err.write('Failed to open files to transcribe qlocale.h: ' + (e.message or e.args[1]))
542        return 1
543
544    try:
545        writer.languages(reader.languages)
546        writer.scripts(reader.scripts)
547        writer.countries(reader.countries)
548    except Error as e:
549        writer.cleanup()
550        err.write('\nError updating qlocale.h: ' + e.message + '\n')
551        return 1
552
553    writer.close()
554
555    # qlocale.qdoc
556    try:
557        writer = Transcriber(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.qdoc'),
558                             qtsrcdir)
559    except IOError as e:
560        err.write('Failed to open files to transcribe qlocale.qdoc: ' + (e.message or e.args[1]))
561        return 1
562
563    DOCSTRING = "    QLocale's data is based on Common Locale Data Repository "
564    try:
565        for line in writer.reader:
566            if DOCSTRING in line:
567                writer.writer.write(DOCSTRING + 'v' + reader.cldrVersion + '.\n')
568            else:
569                writer.writer.write(line)
570    except Error as e:
571        writer.cleanup()
572        err.write('\nError updating qlocale.qdoc: ' + e.message + '\n')
573        return 1
574
575    writer.close()
576    return 0
577
578if __name__ == "__main__":
579    import sys
580    sys.exit(main(sys.argv, sys.stdout, sys.stderr))
581