1#!/usr/bin/env python2 2############################################################################# 3## 4## Copyright (C) 2020 The Qt Company Ltd. 5## Contact: https://www.qt.io/licensing/ 6## 7## This file is part of the test suite of the Qt Toolkit. 8## 9## $QT_BEGIN_LICENSE:GPL-EXCEPT$ 10## Commercial License Usage 11## Licensees holding valid commercial Qt licenses may use this file in 12## accordance with the commercial license agreement provided with the 13## Software or, alternatively, in accordance with the terms contained in 14## a written agreement between you and The Qt Company. For licensing terms 15## and conditions see https://www.qt.io/terms-conditions. For further 16## information use the contact form at https://www.qt.io/contact-us. 17## 18## GNU General Public License Usage 19## Alternatively, this file may be used under the terms of the GNU 20## General Public License version 3 as published by the Free Software 21## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT 22## included in the packaging of this file. Please review the following 23## information to ensure the GNU General Public License requirements will 24## be met: https://www.gnu.org/licenses/gpl-3.0.html. 25## 26## $QT_END_LICENSE$ 27## 28############################################################################# 29"""Script to generate C++ code from CLDR data in qLocaleXML form 30 31See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself. 32Pass the output file from that as first parameter to this script; pass 33the root of the qtbase check-out as second parameter. 34""" 35 36import os 37import datetime 38 39from qlocalexml import QLocaleXmlReader 40from xml.dom import minidom 41from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor 42 43def compareLocaleKeys(key1, key2): 44 if key1 == key2: 45 return 0 46 47 if key1[0] != key2[0]: # First sort by language: 48 return key1[0] - key2[0] 49 50 defaults = compareLocaleKeys.default_map 51 # maps {(language, script): country} by ID 52 try: 53 country = defaults[key1[:2]] 54 except KeyError: 55 pass 56 else: 57 if key1[2] == country: 58 return -1 59 if key2[2] == country: 60 return 1 61 62 if key1[1] == key2[1]: 63 return key1[2] - key2[2] 64 65 try: 66 country = defaults[key2[:2]] 67 except KeyError: 68 pass 69 else: 70 if key2[2] == country: 71 return 1 72 if key1[2] == country: 73 return -1 74 75 return key1[1] - key2[1] 76 77 78class StringDataToken: 79 def __init__(self, index, length): 80 if index > 0xFFFF or length > 0xFFFF: 81 raise Error("Position exceeds ushort range: {},{}".format(index, length)) 82 self.index = index 83 self.length = length 84 def __str__(self): 85 return " {},{} ".format(self.index, self.length) 86 87class StringData: 88 def __init__(self, name): 89 self.data = [] 90 self.hash = {} 91 self.name = name 92 93 def append(self, s): 94 if s in self.hash: 95 return self.hash[s] 96 97 lst = unicode2hex(s) 98 index = len(self.data) 99 if index > 0xffff: 100 raise Error('Data index {} is too big for uint16!'.format(index)) 101 size = len(lst) 102 if size >= 0xffff: 103 raise Error('Data is too big ({}) for uint16 size!'.format(size)) 104 token = None 105 try: 106 token = StringDataToken(index, size) 107 except Error as e: 108 e.message += '(on data "{}")'.format(s) 109 raise 110 self.hash[s] = token 111 self.data += lst 112 return token 113 114 def write(self, fd): 115 fd.write("\nstatic const ushort {}[] = {{\n".format(self.name)) 116 fd.write(wrap_list(self.data)) 117 fd.write("\n};\n") 118 119def currencyIsoCodeData(s): 120 if s: 121 return '{' + ",".join(str(ord(x)) for x in s) + '}' 122 return "{0,0,0}" 123 124class LocaleSourceEditor (SourceFileEditor): 125 __upinit = SourceFileEditor.__init__ 126 def __init__(self, path, temp, version): 127 self.__upinit(path, temp) 128 self.writer.write(""" 129/* 130 This part of the file was generated on {} from the 131 Common Locale Data Repository v{} 132 133 http://www.unicode.org/cldr/ 134 135 Do not edit this section: instead regenerate it using 136 cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or 137 edited) CLDR data; see qtbase/util/locale_database/. 138*/ 139 140""".format(datetime.date.today(), version)) 141 142class LocaleDataWriter (LocaleSourceEditor): 143 def likelySubtags(self, likely): 144 self.writer.write('static const QLocaleId likely_subtags[] = {\n') 145 for had, have, got, give, last in likely: 146 self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have)) 147 self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give)) 148 self.writer.write(' ' if last else ',') 149 self.writer.write(' // {} -> {}\n'.format(had, got)) 150 self.writer.write('};\n\n') 151 152 def localeIndex(self, indices): 153 self.writer.write('static const quint16 locale_index[] = {\n') 154 for pair in indices: 155 self.writer.write('{:6d}, // {}\n'.format(*pair)) 156 self.writer.write(' 0 // trailing 0\n') 157 self.writer.write('};\n\n') 158 159 def localeData(self, locales, names): 160 list_pattern_part_data = StringData('list_pattern_part_data') 161 date_format_data = StringData('date_format_data') 162 time_format_data = StringData('time_format_data') 163 days_data = StringData('days_data') 164 am_data = StringData('am_data') 165 pm_data = StringData('pm_data') 166 byte_unit_data = StringData('byte_unit_data') 167 currency_symbol_data = StringData('currency_symbol_data') 168 currency_display_name_data = StringData('currency_display_name_data') 169 currency_format_data = StringData('currency_format_data') 170 endonyms_data = StringData('endonyms_data') 171 172 # Locale data 173 self.writer.write('static const QLocaleData locale_data[] = {\n') 174 # Table headings: keep each label centred in its field, matching line_format: 175 self.writer.write(' // ' 176 # Width 6 + comma 177 ' lang ' # IDs 178 'script ' 179 ' terr ' 180 ' dec ' # Numeric punctuation 181 ' group ' 182 ' list ' # Delimiter for *numeric* lists 183 ' prcnt ' # Arithmetic symbols 184 ' zero ' 185 ' minus ' 186 ' plus ' 187 ' exp ' 188 # Width 8 + comma - to make space for these wide labels ! 189 ' quotOpn ' # Quotation marks 190 ' quotEnd ' 191 'altQtOpn ' 192 'altQtEnd ' 193 # Width 11 + comma 194 ' lpStart ' # List pattern 195 ' lpMid ' 196 ' lpEnd ' 197 ' lpTwo ' 198 ' sDtFmt ' # Date format 199 ' lDtFmt ' 200 ' sTmFmt ' # Time format 201 ' lTmFmt ' 202 ' ssDays ' # Days 203 ' slDays ' 204 ' snDays ' 205 ' sDays ' 206 ' lDays ' 207 ' nDays ' 208 ' am ' # am/pm indicators 209 ' pm ' 210 # Width 8 + comma 211 ' byte ' 212 ' siQuant ' 213 'iecQuant ' 214 # Width 8+4 + comma 215 ' currISO ' 216 # Width 11 + comma 217 ' currSym ' # Currency formatting 218 ' currDsply ' 219 ' currFmt ' 220 ' currFmtNeg ' 221 ' endoLang ' # Name of language in itself, and of country 222 ' endoCntry ' 223 # Width 6 + comma 224 'curDgt ' # Currency number representation 225 'curRnd ' 226 'dow1st ' # First day of week 227 ' wknd+ ' # Week-end start/end days 228 ' wknd-' 229 # No trailing space on last entry (be sure to 230 # pad before adding anything after it). 231 '\n') 232 233 formatLine = ''.join(( 234 ' {{ ', 235 # Locale-identifier 236 '{:6d},' * 3, 237 # Numeric formats, list delimiter 238 '{:6d},' * 8, 239 # Quotation marks 240 '{:8d},' * 4, 241 # List patterns, date/time formats, month/day names, am/pm 242 '{:>11s},' * 16, 243 # SI/IEC byte-unit abbreviations 244 '{:>8s},' * 3, 245 # Currency ISO code 246 ' {:>10s}, ', 247 # Currency and endonyms 248 '{:>11s},' * 6, 249 # Currency formatting 250 '{:6d},{:6d}', 251 # Day of week and week-end 252 ',{:6d}' * 3, 253 ' }}')).format 254 for key in names: 255 locale = locales[key] 256 self.writer.write(formatLine( 257 key[0], key[1], key[2], 258 locale.decimal, 259 locale.group, 260 locale.listDelim, 261 locale.percent, 262 locale.zero, 263 locale.minus, 264 locale.plus, 265 locale.exp, 266 locale.quotationStart, 267 locale.quotationEnd, 268 locale.alternateQuotationStart, 269 locale.alternateQuotationEnd, 270 list_pattern_part_data.append(locale.listPatternPartStart), 271 list_pattern_part_data.append(locale.listPatternPartMiddle), 272 list_pattern_part_data.append(locale.listPatternPartEnd), 273 list_pattern_part_data.append(locale.listPatternPartTwo), 274 date_format_data.append(locale.shortDateFormat), 275 date_format_data.append(locale.longDateFormat), 276 time_format_data.append(locale.shortTimeFormat), 277 time_format_data.append(locale.longTimeFormat), 278 days_data.append(locale.standaloneShortDays), 279 days_data.append(locale.standaloneLongDays), 280 days_data.append(locale.standaloneNarrowDays), 281 days_data.append(locale.shortDays), 282 days_data.append(locale.longDays), 283 days_data.append(locale.narrowDays), 284 am_data.append(locale.am), 285 pm_data.append(locale.pm), 286 byte_unit_data.append(locale.byte_unit), 287 byte_unit_data.append(locale.byte_si_quantified), 288 byte_unit_data.append(locale.byte_iec_quantified), 289 currencyIsoCodeData(locale.currencyIsoCode), 290 currency_symbol_data.append(locale.currencySymbol), 291 currency_display_name_data.append(locale.currencyDisplayName), 292 currency_format_data.append(locale.currencyFormat), 293 currency_format_data.append(locale.currencyNegativeFormat), 294 endonyms_data.append(locale.languageEndonym), 295 endonyms_data.append(locale.countryEndonym), 296 locale.currencyDigits, 297 locale.currencyRounding, # unused (QTBUG-81343) 298 locale.firstDayOfWeek, 299 locale.weekendStart, 300 locale.weekendEnd) 301 + ', // {}/{}/{}\n'.format( 302 locale.language, locale.script, locale.country)) 303 self.writer.write(formatLine(*( # All zeros, matching the format: 304 (0,) * (3 + 8 + 4) + ('0,0',) * (16 + 3) 305 + (currencyIsoCodeData(0),) 306 + ('0,0',) * 6 + (0,) * (2 + 3) )) 307 + ' // trailing zeros\n') 308 self.writer.write('};\n') 309 310 # StringData tables: 311 for data in (list_pattern_part_data, date_format_data, 312 time_format_data, days_data, 313 byte_unit_data, am_data, pm_data, currency_symbol_data, 314 currency_display_name_data, currency_format_data, 315 endonyms_data): 316 data.write(self.writer) 317 318 @staticmethod 319 def __writeNameData(out, book, form): 320 out('static const char {}_name_list[] =\n'.format(form)) 321 out('"Default\\0"\n') 322 for key, value in book.items(): 323 if key == 0: 324 continue 325 out('"' + value[0] + '\\0"\n') 326 out(';\n\n') 327 328 out('static const quint16 {}_name_index[] = {{\n'.format(form)) 329 out(' 0, // Any{}\n'.format(form.capitalize())) 330 index = 8 331 for key, value in book.items(): 332 if key == 0: 333 continue 334 name = value[0] 335 out('{:6d}, // {}\n'.format(index, name)) 336 index += len(name) + 1 337 out('};\n\n') 338 339 @staticmethod 340 def __writeCodeList(out, book, form, width): 341 out('static const unsigned char {}_code_list[] =\n'.format(form)) 342 for key, value in book.items(): 343 code = value[1] 344 code += r'\0' * max(width - len(code), 0) 345 out('"{}" // {}\n'.format(code, value[0])) 346 out(';\n\n') 347 348 def languageNames(self, languages): 349 self.__writeNameData(self.writer.write, languages, 'language') 350 351 def scriptNames(self, scripts): 352 self.__writeNameData(self.writer.write, scripts, 'script') 353 354 def countryNames(self, countries): 355 self.__writeNameData(self.writer.write, countries, 'country') 356 357 # TODO: unify these next three into the previous three; kept 358 # separate for now to verify we're not changing data. 359 360 def languageCodes(self, languages): 361 self.__writeCodeList(self.writer.write, languages, 'language', 3) 362 363 def scriptCodes(self, scripts): 364 self.__writeCodeList(self.writer.write, scripts, 'script', 4) 365 366 def countryCodes(self, countries): # TODO: unify with countryNames() 367 self.__writeCodeList(self.writer.write, countries, 'country', 3) 368 369class CalendarDataWriter (LocaleSourceEditor): 370 formatCalendar = ''.join(( 371 ' {{', 372 '{:6d}', 373 ',{:6d}' * 2, 374 ',{{{:>5s}}}' * 6, 375 '}}, ')).format 376 def write(self, calendar, locales, names): 377 months_data = StringData('months_data') 378 379 self.writer.write('static const QCalendarLocale locale_data[] = {\n') 380 self.writer.write(' // ' 381 # IDs, width 7 (6 + comma) 382 + ' lang ' 383 + ' script' 384 + ' terr ' 385 # Month-name start-end pairs, width 8 (5 plus '{},'): 386 + ' sShort ' 387 + ' sLong ' 388 + ' sNarrow' 389 + ' short ' 390 + ' long ' 391 + ' narrow' 392 # No trailing space on last; be sure 393 # to pad before adding later entries. 394 + '\n') 395 for key in names: 396 locale = locales[key] 397 self.writer.write( 398 self.formatCalendar( 399 key[0], key[1], key[2], 400 months_data.append(locale.standaloneShortMonths[calendar]), 401 months_data.append(locale.standaloneLongMonths[calendar]), 402 months_data.append(locale.standaloneNarrowMonths[calendar]), 403 months_data.append(locale.shortMonths[calendar]), 404 months_data.append(locale.longMonths[calendar]), 405 months_data.append(locale.narrowMonths[calendar])) 406 + '// {}/{}/{}\n'.format(locale.language, locale.script, locale.country)) 407 self.writer.write(self.formatCalendar(*( (0,) * 3 + ('0,0',) * 6 )) 408 + '// trailing zeros\n') 409 self.writer.write('};\n') 410 months_data.write(self.writer) 411 412class LocaleHeaderWriter (SourceFileEditor): 413 __upinit = SourceFileEditor.__init__ 414 def __init__(self, path, temp, dupes): 415 self.__upinit(path, temp) 416 self.__dupes = dupes 417 418 def languages(self, languages): 419 self.__enum('Language', languages, self.__language) 420 self.writer.write('\n') 421 422 def countries(self, countries): 423 self.__enum('Country', countries, self.__country) 424 425 def scripts(self, scripts): 426 self.__enum('Script', scripts, self.__script) 427 self.writer.write('\n') 428 429 # Implementation details 430 from enumdata import (language_aliases as __language, 431 country_aliases as __country, 432 script_aliases as __script) 433 434 def __enum(self, name, book, alias): 435 assert book 436 out, dupes = self.writer.write, self.__dupes 437 out(' enum {} {{\n'.format(name)) 438 for key, value in book.items(): 439 member = value[0] 440 if name == 'Script': 441 # Don't .capitalize() as some names are already camel-case (see enumdata.py): 442 member = ''.join(word[0].upper() + word[1:] for word in member.split()) 443 if not member.endswith('Script'): 444 member += 'Script' 445 if member in dupes: 446 raise Error('The script name "{}" is messy'.format(member)) 447 else: 448 member = ''.join(member.split()) 449 member = member + name if member in dupes else member 450 out(' {} = {},\n'.format(member, key)) 451 452 out('\n ' 453 + ',\n '.join('{} = {}'.format(*pair) 454 for pair in sorted(alias.items())) 455 + ',\n\n Last{} = {}\n }};\n'.format(name, member)) 456 457def usage(name, err, message = ''): 458 err.write("""Usage: {} path/to/qlocale.xml root/of/qtbase 459""".format(name)) # TODO: elaborate 460 if message: 461 err.write('\n' + message + '\n') 462 463def main(args, out, err): 464 # TODO: Make calendars a command-line parameter 465 # map { CLDR name: Qt file name } 466 calendars = {'gregorian': 'roman', 'persian': 'jalali', 'islamic': 'hijri',} # 'hebrew': 'hebrew', 467 468 name = args.pop(0) 469 if len(args) != 2: 470 usage(name, err, 'I expect two arguments') 471 return 1 472 473 qlocalexml = args.pop(0) 474 qtsrcdir = args.pop(0) 475 476 if not (os.path.isdir(qtsrcdir) 477 and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'text', leaf)) 478 for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))): 479 usage(name, err, 'Missing expected files under qtbase source root ' + qtsrcdir) 480 return 1 481 482 reader = QLocaleXmlReader(qlocalexml) 483 locale_map = dict(reader.loadLocaleMap(calendars, err.write)) 484 485 locale_keys = locale_map.keys() 486 compareLocaleKeys.default_map = dict(reader.defaultMap()) 487 locale_keys.sort(compareLocaleKeys) 488 489 try: 490 writer = LocaleDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 491 'qlocale_data_p.h'), 492 qtsrcdir, reader.cldrVersion) 493 except IOError as e: 494 err.write('Failed to open files to transcribe locale data: ' + (e.message or e.args[1])) 495 return 1 496 497 try: 498 writer.likelySubtags(reader.likelyMap()) 499 writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map))) 500 writer.localeData(locale_map, locale_keys) 501 writer.writer.write('\n') 502 writer.languageNames(reader.languages) 503 writer.scriptNames(reader.scripts) 504 writer.countryNames(reader.countries) 505 # TODO: merge the next three into the previous three 506 writer.languageCodes(reader.languages) 507 writer.scriptCodes(reader.scripts) 508 writer.countryCodes(reader.countries) 509 except Error as e: 510 writer.cleanup() 511 err.write('\nError updating locale data: ' + e.message + '\n') 512 return 1 513 514 writer.close() 515 516 # Generate calendar data 517 for calendar, stem in calendars.items(): 518 try: 519 writer = CalendarDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'time', 520 'q{}calendar_data_p.h'.format(stem)), 521 qtsrcdir, reader.cldrVersion) 522 except IOError as e: 523 err.write('Failed to open files to transcribe ' + calendar 524 + ' data ' + (e.message or e.args[1])) 525 return 1 526 527 try: 528 writer.write(calendar, locale_map, locale_keys) 529 except Error as e: 530 writer.cleanup() 531 err.write('\nError updating ' + calendar + ' locale data: ' + e.message + '\n') 532 return 1 533 534 writer.close() 535 536 # qlocale.h 537 try: 538 writer = LocaleHeaderWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.h'), 539 qtsrcdir, reader.dupes) 540 except IOError as e: 541 err.write('Failed to open files to transcribe qlocale.h: ' + (e.message or e.args[1])) 542 return 1 543 544 try: 545 writer.languages(reader.languages) 546 writer.scripts(reader.scripts) 547 writer.countries(reader.countries) 548 except Error as e: 549 writer.cleanup() 550 err.write('\nError updating qlocale.h: ' + e.message + '\n') 551 return 1 552 553 writer.close() 554 555 # qlocale.qdoc 556 try: 557 writer = Transcriber(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.qdoc'), 558 qtsrcdir) 559 except IOError as e: 560 err.write('Failed to open files to transcribe qlocale.qdoc: ' + (e.message or e.args[1])) 561 return 1 562 563 DOCSTRING = " QLocale's data is based on Common Locale Data Repository " 564 try: 565 for line in writer.reader: 566 if DOCSTRING in line: 567 writer.writer.write(DOCSTRING + 'v' + reader.cldrVersion + '.\n') 568 else: 569 writer.writer.write(line) 570 except Error as e: 571 writer.cleanup() 572 err.write('\nError updating qlocale.qdoc: ' + e.message + '\n') 573 return 1 574 575 writer.close() 576 return 0 577 578if __name__ == "__main__": 579 import sys 580 sys.exit(main(sys.argv, sys.stdout, sys.stderr)) 581