1#!/usr/bin/env python
2#
3# Any copyright is dedicated to the Public Domain.
4# http://creativecommons.org/publicdomain/zero/1.0/
5#
6# Generate SOURCES in sources.mozbuild files from ICU's Makefile.in
7# files, and also build a standalone copy of ICU using its build
8# system to generate a new copy of the in-tree ICU data file.
9#
10# This script expects to be run from `update-icu.sh` after the in-tree
11# copy of ICU has been updated.
12
13from __future__ import absolute_import
14from __future__ import print_function
15
16import glob
17import multiprocessing
18import os
19import shutil
20import subprocess
21import sys
22import tempfile
23
24from mozpack import path as mozpath
25
26# The following files have been determined to be dead/unused by a
27# semi-automated analysis. You can just remove any of the files below
28# if you need them. However, files marked with a "Cluster" comment
29# can only be removed together, as they have (directional) dependencies.
30# If you want to rerun this analysis, contact :decoder.
31UNUSED_SOURCES = set(
32    [
33        "intl/icu/source/common/bytestrieiterator.cpp",
34        "intl/icu/source/common/cstr.cpp",
35        "intl/icu/source/common/cwchar.cpp",
36        "intl/icu/source/common/icudataver.cpp",
37        "intl/icu/source/common/icuplug.cpp",
38        "intl/icu/source/common/pluralmap.cpp",
39        "intl/icu/source/common/ucat.cpp",
40        "intl/icu/source/common/ucnv2022.cpp",
41        "intl/icu/source/common/ucnv_ct.cpp",
42        "intl/icu/source/common/ucnvdisp.cpp",
43        "intl/icu/source/common/ucnv_ext.cpp",
44        "intl/icu/source/common/ucnvhz.cpp",
45        "intl/icu/source/common/ucnvisci.cpp",
46        "intl/icu/source/common/ucnv_lmb.cpp",
47        "intl/icu/source/common/ucnvmbcs.cpp",
48        "intl/icu/source/common/uidna.cpp",
49        "intl/icu/source/common/unorm.cpp",
50        "intl/icu/source/common/usc_impl.cpp",
51        "intl/icu/source/common/ustr_wcs.cpp",
52        "intl/icu/source/common/util_props.cpp",
53        "intl/icu/source/i18n/anytrans.cpp",
54        "intl/icu/source/i18n/brktrans.cpp",
55        "intl/icu/source/i18n/casetrn.cpp",
56        "intl/icu/source/i18n/cpdtrans.cpp",
57        "intl/icu/source/i18n/esctrn.cpp",
58        "intl/icu/source/i18n/fmtable_cnv.cpp",
59        "intl/icu/source/i18n/funcrepl.cpp",
60        "intl/icu/source/i18n/gender.cpp",
61        "intl/icu/source/i18n/name2uni.cpp",
62        "intl/icu/source/i18n/nortrans.cpp",
63        "intl/icu/source/i18n/nultrans.cpp",
64        "intl/icu/source/i18n/quant.cpp",
65        "intl/icu/source/i18n/rbt.cpp",
66        "intl/icu/source/i18n/rbt_data.cpp",
67        "intl/icu/source/i18n/rbt_pars.cpp",
68        "intl/icu/source/i18n/rbt_rule.cpp",
69        "intl/icu/source/i18n/rbt_set.cpp",
70        "intl/icu/source/i18n/regexcmp.cpp",
71        "intl/icu/source/i18n/regeximp.cpp",
72        "intl/icu/source/i18n/regexst.cpp",
73        "intl/icu/source/i18n/regextxt.cpp",
74        "intl/icu/source/i18n/rematch.cpp",
75        "intl/icu/source/i18n/remtrans.cpp",
76        "intl/icu/source/i18n/repattrn.cpp",
77        "intl/icu/source/i18n/scientificnumberformatter.cpp",
78        "intl/icu/source/i18n/strmatch.cpp",
79        "intl/icu/source/i18n/strrepl.cpp",
80        "intl/icu/source/i18n/titletrn.cpp",
81        "intl/icu/source/i18n/tolowtrn.cpp",
82        "intl/icu/source/i18n/toupptrn.cpp",
83        "intl/icu/source/i18n/translit.cpp",
84        "intl/icu/source/i18n/transreg.cpp",
85        "intl/icu/source/i18n/tridpars.cpp",
86        "intl/icu/source/i18n/unesctrn.cpp",
87        "intl/icu/source/i18n/uni2name.cpp",
88        "intl/icu/source/i18n/uregexc.cpp",
89        "intl/icu/source/i18n/uregex.cpp",
90        "intl/icu/source/i18n/uregion.cpp",
91        "intl/icu/source/i18n/uspoof_build.cpp",
92        "intl/icu/source/i18n/uspoof_conf.cpp",
93        "intl/icu/source/i18n/utrans.cpp",
94        "intl/icu/source/i18n/vzone.cpp",
95        "intl/icu/source/i18n/zrule.cpp",
96        "intl/icu/source/i18n/ztrans.cpp",
97        # Cluster
98        "intl/icu/source/common/resbund_cnv.cpp",
99        "intl/icu/source/common/ures_cnv.cpp",
100        # Cluster
101        "intl/icu/source/common/propsvec.cpp",
102        "intl/icu/source/common/ucnvsel.cpp",
103        "intl/icu/source/common/ucnv_set.cpp",
104        # Cluster
105        "intl/icu/source/common/ubiditransform.cpp",
106        "intl/icu/source/common/ushape.cpp",
107        # Cluster
108        "intl/icu/source/i18n/csdetect.cpp",
109        "intl/icu/source/i18n/csmatch.cpp",
110        "intl/icu/source/i18n/csr2022.cpp",
111        "intl/icu/source/i18n/csrecog.cpp",
112        "intl/icu/source/i18n/csrmbcs.cpp",
113        "intl/icu/source/i18n/csrsbcs.cpp",
114        "intl/icu/source/i18n/csrucode.cpp",
115        "intl/icu/source/i18n/csrutf8.cpp",
116        "intl/icu/source/i18n/inputext.cpp",
117        "intl/icu/source/i18n/ucsdet.cpp",
118        # Cluster
119        "intl/icu/source/i18n/alphaindex.cpp",
120        "intl/icu/source/i18n/ulocdata.cpp",
121    ]
122)
123
124
125def ensure_source_file_exists(dir, filename):
126    f = mozpath.join(dir, filename)
127    if os.path.isfile(f):
128        return f
129    raise Exception("Couldn't find source file for: %s" % filename)
130
131
132def get_sources(sources_file):
133    srcdir = os.path.dirname(sources_file)
134    with open(sources_file) as f:
135        return sorted(
136            (ensure_source_file_exists(srcdir, name.strip()) for name in f),
137            key=lambda x: x.lower(),
138        )
139
140
141def list_headers(path):
142    result = []
143    for name in os.listdir(path):
144        f = mozpath.join(path, name)
145        if os.path.isfile(f):
146            result.append(f)
147    return sorted(result, key=lambda x: x.lower())
148
149
150def write_sources(mozbuild, sources, headers):
151    with open(mozbuild, "w", newline="\n", encoding="utf-8") as f:
152        f.write(
153            "# THIS FILE IS GENERATED BY /intl/icu_sources_data.py " + "DO NOT EDIT\n"
154        )
155
156        def write_list(name, content):
157            if content:
158                f.write("%s %s [\n" % (name, "=" if name.islower() else "+="))
159                f.write("".join("   '/%s',\n" % s for s in content))
160                f.write("]\n")
161
162        write_list("sources", [s for s in sources if s not in UNUSED_SOURCES])
163        write_list("other_sources", [s for s in sources if s in UNUSED_SOURCES])
164        write_list("EXPORTS.unicode", headers)
165
166
167def update_sources(topsrcdir):
168    print("Updating ICU sources lists...")
169    for d in ["common", "i18n", "tools/toolutil", "tools/icupkg"]:
170        base_path = mozpath.join(topsrcdir, "intl/icu/source/%s" % d)
171        sources_file = mozpath.join(base_path, "sources.txt")
172        mozbuild = mozpath.join(
173            topsrcdir, "config/external/icu/%s/sources.mozbuild" % mozpath.basename(d)
174        )
175        sources = [mozpath.relpath(s, topsrcdir) for s in get_sources(sources_file)]
176        unicode_dir = mozpath.join(base_path, "unicode")
177        if os.path.exists(unicode_dir):
178            headers = [
179                mozpath.normsep(os.path.relpath(s, topsrcdir))
180                for s in list_headers(unicode_dir)
181            ]
182        else:
183            headers = None
184        write_sources(mozbuild, sources, headers)
185
186
187def try_run(name, command, cwd=None, **kwargs):
188    try:
189        with tempfile.NamedTemporaryFile(prefix=name, delete=False) as f:
190            subprocess.check_call(
191                command, cwd=cwd, stdout=f, stderr=subprocess.STDOUT, **kwargs
192            )
193    except subprocess.CalledProcessError:
194        print(
195            """Error running "{}" in directory {}
196    See output in {}""".format(
197                " ".join(command), cwd, f.name
198            ),
199            file=sys.stderr,
200        )
201        return False
202    else:
203        os.unlink(f.name)
204        return True
205
206
207def get_data_file(data_dir):
208    files = glob.glob(mozpath.join(data_dir, "icudt*.dat"))
209    return files[0] if files else None
210
211
212def update_data_file(topsrcdir):
213    objdir = tempfile.mkdtemp(prefix="icu-obj-")
214    configure = mozpath.join(topsrcdir, "intl/icu/source/configure")
215    env = dict(os.environ)
216    # bug 1262101 - these should be shared with the moz.build files
217    env.update(
218        {
219            "CPPFLAGS": (
220                "-DU_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 "
221                + "-DU_HIDE_OBSOLETE_UTF_OLD_H=1"
222                + "-DUCONFIG_NO_LEGACY_CONVERSION "
223                + "-DUCONFIG_NO_TRANSLITERATION "
224                + "-DUCONFIG_NO_REGULAR_EXPRESSIONS "
225                + "-DUCONFIG_NO_BREAK_ITERATION "
226                + "-DU_CHARSET_IS_UTF8"
227            )
228        }
229    )
230
231    # Exclude data that we currently don't need.
232    #
233    # The file format for ICU's data build tool is described at
234    # <https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md>.
235    env["ICU_DATA_FILTER_FILE"] = mozpath.join(topsrcdir, "intl/icu/data_filter.json")
236
237    print("Running ICU configure...")
238    if not try_run(
239        "icu-configure",
240        [
241            "sh",
242            configure,
243            "--with-data-packaging=archive",
244            "--enable-static",
245            "--disable-shared",
246            "--disable-extras",
247            "--disable-icuio",
248            "--disable-layout",
249            "--disable-layoutex",
250            "--disable-tests",
251            "--disable-samples",
252            "--disable-strict",
253        ],
254        cwd=objdir,
255        env=env,
256    ):
257        return False
258    print("Running ICU make...")
259    if not try_run(
260        "icu-make",
261        ["make", "--jobs=%d" % multiprocessing.cpu_count(), "--output-sync"],
262        cwd=objdir,
263    ):
264        return False
265    print("Copying ICU data file...")
266    tree_data_path = mozpath.join(topsrcdir, "config/external/icu/data/")
267    old_data_file = get_data_file(tree_data_path)
268    if not old_data_file:
269        print("Error: no ICU data file in %s" % tree_data_path, file=sys.stderr)
270        return False
271    new_data_file = get_data_file(mozpath.join(objdir, "data/out"))
272    if not new_data_file:
273        print("Error: no ICU data in ICU objdir", file=sys.stderr)
274        return False
275    if os.path.basename(old_data_file) != os.path.basename(new_data_file):
276        # Data file name has the major version number embedded.
277        os.unlink(old_data_file)
278    shutil.copy(new_data_file, tree_data_path)
279    try:
280        shutil.rmtree(objdir)
281    except Exception:
282        print("Warning: failed to remove %s" % objdir, file=sys.stderr)
283    return True
284
285
286def main():
287    if len(sys.argv) != 2:
288        print("Usage: icu_sources_data.py <mozilla topsrcdir>", file=sys.stderr)
289        sys.exit(1)
290
291    topsrcdir = mozpath.abspath(sys.argv[1])
292    update_sources(topsrcdir)
293    if not update_data_file(topsrcdir):
294        print("Error updating ICU data file", file=sys.stderr)
295        sys.exit(1)
296
297
298if __name__ == "__main__":
299    main()
300