1#!/usr/bin/env python
2#
3# Copyright 2011-2018 The Rust Project Developers. See the COPYRIGHT
4# file at the top-level directory of this distribution and at
5# http://rust-lang.org/COPYRIGHT.
6#
7# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
8# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
9# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
10# option. This file may not be copied, modified, or distributed
11# except according to those terms.
12
13# This script uses the following Unicode tables:
14# - DerivedNormalizationProps.txt
15# - NormalizationTest.txt
16# - UnicodeData.txt
17# - StandardizedVariants.txt
18#
19# Since this should not require frequent updates, we just store this
20# out-of-line and check the tables.rs and normalization_tests.rs files into git.
21import collections
22import urllib.request
23
24UNICODE_VERSION = "13.0.0"
25UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
26
27PREAMBLE = """// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
28// file at the top-level directory of this distribution and at
29// http://rust-lang.org/COPYRIGHT.
30//
31// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
32// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
33// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
34// option. This file may not be copied, modified, or distributed
35// except according to those terms.
36
37// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
38
39#![allow(missing_docs)]
40"""
41
42NormalizationTest = collections.namedtuple(
43    "NormalizationTest",
44    ["source", "nfc", "nfd", "nfkc", "nfkd"],
45)
46
47# Mapping taken from Table 12 from:
48# http://www.unicode.org/reports/tr44/#General_Category_Values
49expanded_categories = {
50    'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
51    'Lm': ['L'], 'Lo': ['L'],
52    'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
53    'Nd': ['N'], 'Nl': ['N'], 'No': ['No'],
54    'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
55    'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
56    'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
57    'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
58    'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
59}
60
61# Constants from Unicode 9.0.0 Section 3.12 Conjoining Jamo Behavior
62# http://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#M9.32468.Heading.310.Combining.Jamo.Behavior
63S_BASE, L_COUNT, V_COUNT, T_COUNT = 0xAC00, 19, 21, 28
64S_COUNT = L_COUNT * V_COUNT * T_COUNT
65
66class UnicodeData(object):
67    def __init__(self):
68        self._load_unicode_data()
69        self.norm_props = self._load_norm_props()
70        self.norm_tests = self._load_norm_tests()
71
72        self.canon_comp = self._compute_canonical_comp()
73        self.canon_fully_decomp, self.compat_fully_decomp = self._compute_fully_decomposed()
74
75        self.cjk_compat_variants_fully_decomp = {}
76        self._load_cjk_compat_ideograph_variants()
77
78        def stats(name, table):
79            count = sum(len(v) for v in table.values())
80            print("%s: %d chars => %d decomposed chars" % (name, len(table), count))
81
82        print("Decomposition table stats:")
83        stats("Canonical decomp", self.canon_decomp)
84        stats("Compatible decomp", self.compat_decomp)
85        stats("Canonical fully decomp", self.canon_fully_decomp)
86        stats("Compatible fully decomp", self.compat_fully_decomp)
87        stats("CJK Compat Variants fully decomp", self.cjk_compat_variants_fully_decomp)
88
89        self.ss_leading, self.ss_trailing = self._compute_stream_safe_tables()
90
91    def _fetch(self, filename):
92        resp = urllib.request.urlopen(UCD_URL + filename)
93        return resp.read().decode('utf-8')
94
95    def _load_unicode_data(self):
96        self.name_to_char_int = {}
97        self.combining_classes = {}
98        self.compat_decomp = {}
99        self.canon_decomp = {}
100        self.general_category_mark = []
101        self.general_category_public_assigned = []
102
103        assigned_start = 0;
104        prev_char_int = -1;
105        prev_name = "";
106
107        for line in self._fetch("UnicodeData.txt").splitlines():
108            # See ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
109            pieces = line.split(';')
110            assert len(pieces) == 15
111            char, name, category, cc, decomp = pieces[0], pieces[1], pieces[2], pieces[3], pieces[5]
112            char_int = int(char, 16)
113
114            name = pieces[1].strip()
115            self.name_to_char_int[name] = char_int
116
117            if cc != '0':
118                self.combining_classes[char_int] = cc
119
120            if decomp.startswith('<'):
121                self.compat_decomp[char_int] = [int(c, 16) for c in decomp.split()[1:]]
122            elif decomp != '':
123                self.canon_decomp[char_int] = [int(c, 16) for c in decomp.split()]
124
125            if category == 'M' or 'M' in expanded_categories.get(category, []):
126                self.general_category_mark.append(char_int)
127
128            assert category != 'Cn', "Unexpected: Unassigned codepoint in UnicodeData.txt"
129            if category not in ['Co', 'Cs']:
130                if char_int != prev_char_int + 1 and not is_first_and_last(prev_name, name):
131                    self.general_category_public_assigned.append((assigned_start, prev_char_int))
132                    assigned_start = char_int
133                prev_char_int = char_int
134                prev_name = name;
135
136        self.general_category_public_assigned.append((assigned_start, prev_char_int))
137
138    def _load_cjk_compat_ideograph_variants(self):
139        for line in self._fetch("StandardizedVariants.txt").splitlines():
140            strip_comments = line.split('#', 1)[0].strip()
141            if not strip_comments:
142                continue
143
144            variation_sequence, description, differences = strip_comments.split(';')
145            description = description.strip()
146
147            # Don't use variations that only apply in particular shaping environments.
148            if differences:
149                continue
150
151            # Look for entries where the description field is a codepoint name.
152            if description not in self.name_to_char_int:
153                continue
154
155            # Only consider the CJK Compatibility Ideographs.
156            if not description.startswith('CJK COMPATIBILITY IDEOGRAPH-'):
157                continue
158
159            char_int = self.name_to_char_int[description]
160
161            assert not char_int in self.combining_classes, "Unexpected: CJK compat variant with a combining class"
162            assert not char_int in self.compat_decomp, "Unexpected: CJK compat variant and compatibility decomposition"
163            assert len(self.canon_decomp[char_int]) == 1, "Unexpected: CJK compat variant and non-singleton canonical decomposition"
164            # If we ever need to handle Hangul here, we'll need to handle it separately.
165            assert not (S_BASE <= char_int < S_BASE + S_COUNT)
166
167            cjk_compat_variant_parts = [int(c, 16) for c in variation_sequence.split()]
168            for c in cjk_compat_variant_parts:
169                assert not c in self.canon_decomp, "Unexpected: CJK compat variant is unnormalized (canon)"
170                assert not c in self.compat_decomp, "Unexpected: CJK compat variant is unnormalized (compat)"
171            self.cjk_compat_variants_fully_decomp[char_int] = cjk_compat_variant_parts
172
173    def _load_norm_props(self):
174        props = collections.defaultdict(list)
175
176        for line in self._fetch("DerivedNormalizationProps.txt").splitlines():
177            (prop_data, _, _) = line.partition("#")
178            prop_pieces = prop_data.split(";")
179
180            if len(prop_pieces) < 2:
181                continue
182
183            assert len(prop_pieces) <= 3
184            (low, _, high) = prop_pieces[0].strip().partition("..")
185
186            prop = prop_pieces[1].strip()
187
188            data = None
189            if len(prop_pieces) == 3:
190                data = prop_pieces[2].strip()
191
192            props[prop].append((low, high, data))
193
194        return props
195
196    def _load_norm_tests(self):
197        tests = []
198        for line in self._fetch("NormalizationTest.txt").splitlines():
199            (test_data, _, _) = line.partition("#")
200            test_pieces = test_data.split(";")
201
202            if len(test_pieces) < 5:
203                continue
204
205            source, nfc, nfd, nfkc, nfkd = [[c.strip() for c in p.split()] for p in test_pieces[:5]]
206            tests.append(NormalizationTest(source, nfc, nfd, nfkc, nfkd))
207
208        return tests
209
210    def _compute_canonical_comp(self):
211        canon_comp = {}
212        comp_exclusions = [
213            (int(low, 16), int(high or low, 16))
214            for low, high, _ in self.norm_props["Full_Composition_Exclusion"]
215        ]
216        for char_int, decomp in self.canon_decomp.items():
217            if any(lo <= char_int <= hi for lo, hi in comp_exclusions):
218                continue
219
220            assert len(decomp) == 2
221            assert (decomp[0], decomp[1]) not in canon_comp
222            canon_comp[(decomp[0], decomp[1])] = char_int
223
224        return canon_comp
225
226    def _compute_fully_decomposed(self):
227        """
228        Even though the decomposition algorithm is recursive, it is possible
229        to precompute the recursion at table generation time with modest
230        increase to the table size.  Then, for these precomputed tables, we
231        note that 1) compatible decomposition is a subset of canonical
232        decomposition and 2) they mostly agree on their intersection.
233        Therefore, we don't store entries in the compatible table for
234        characters that decompose the same way under canonical decomposition.
235
236            Decomposition table stats:
237            Canonical decomp: 2060 chars => 3085 decomposed chars
238            Compatible decomp: 3662 chars => 5440 decomposed chars
239            Canonical fully decomp: 2060 chars => 3404 decomposed chars
240            Compatible fully decomp: 3678 chars => 5599 decomposed chars
241
242        The upshot is that decomposition code is very simple and easy to inline
243        at mild code size cost.
244        """
245        def _decompose(char_int, compatible):
246            # 7-bit ASCII never decomposes
247            if char_int <= 0x7f:
248                yield char_int
249                return
250
251            # Assert that we're handling Hangul separately.
252            assert not (S_BASE <= char_int < S_BASE + S_COUNT)
253
254            decomp = self.canon_decomp.get(char_int)
255            if decomp is not None:
256                for decomposed_ch in decomp:
257                    for fully_decomposed_ch in _decompose(decomposed_ch, compatible):
258                        yield fully_decomposed_ch
259                return
260
261            if compatible and char_int in self.compat_decomp:
262                for decomposed_ch in self.compat_decomp[char_int]:
263                    for fully_decomposed_ch in _decompose(decomposed_ch, compatible):
264                        yield fully_decomposed_ch
265                return
266
267            yield char_int
268            return
269
270        end_codepoint = max(
271            max(self.canon_decomp.keys()),
272            max(self.compat_decomp.keys()),
273        )
274
275        canon_fully_decomp = {}
276        compat_fully_decomp = {}
277
278        for char_int in range(0, end_codepoint + 1):
279            # Always skip Hangul, since it's more efficient to represent its
280            # decomposition programmatically.
281            if S_BASE <= char_int < S_BASE + S_COUNT:
282                continue
283
284            canon = list(_decompose(char_int, False))
285            if not (len(canon) == 1 and canon[0] == char_int):
286                canon_fully_decomp[char_int] = canon
287
288            compat = list(_decompose(char_int, True))
289            if not (len(compat) == 1 and compat[0] == char_int):
290                compat_fully_decomp[char_int] = compat
291
292        # Since canon_fully_decomp is a subset of compat_fully_decomp, we don't
293        # need to store their overlap when they agree.  When they don't agree,
294        # store the decomposition in the compatibility table since we'll check
295        # that first when normalizing to NFKD.
296        assert set(canon_fully_decomp) <= set(compat_fully_decomp)
297
298        for ch in set(canon_fully_decomp) & set(compat_fully_decomp):
299            if canon_fully_decomp[ch] == compat_fully_decomp[ch]:
300                del compat_fully_decomp[ch]
301
302        return canon_fully_decomp, compat_fully_decomp
303
304    def _compute_stream_safe_tables(self):
305        """
306        To make a text stream-safe with the Stream-Safe Text Process (UAX15-D4),
307        we need to be able to know the number of contiguous non-starters *after*
308        applying compatibility decomposition to each character.
309
310        We can do this incrementally by computing the number of leading and
311        trailing non-starters for each character's compatibility decomposition
312        with the following rules:
313
314        1) If a character is not affected by compatibility decomposition, look
315           up its canonical combining class to find out if it's a non-starter.
316        2) All Hangul characters are starters, even under decomposition.
317        3) Otherwise, very few decomposing characters have a nonzero count
318           of leading or trailing non-starters, so store these characters
319           with their associated counts in a separate table.
320        """
321        leading_nonstarters = {}
322        trailing_nonstarters = {}
323
324        for c in set(self.canon_fully_decomp) | set(self.compat_fully_decomp):
325            decomposed = self.compat_fully_decomp.get(c) or self.canon_fully_decomp[c]
326
327            num_leading = 0
328            for d in decomposed:
329                if d not in self.combining_classes:
330                    break
331                num_leading += 1
332
333            num_trailing = 0
334            for d in reversed(decomposed):
335                if d not in self.combining_classes:
336                    break
337                num_trailing += 1
338
339            if num_leading > 0:
340                leading_nonstarters[c] = num_leading
341            if num_trailing > 0:
342                trailing_nonstarters[c] = num_trailing
343
344        return leading_nonstarters, trailing_nonstarters
345
346hexify = lambda c: '{:04X}'.format(c)
347
348# Test whether `first` and `last` are corresponding "<..., First>" and
349# "<..., Last>" markers.
350def is_first_and_last(first, last):
351    if not first.startswith('<') or not first.endswith(', First>'):
352        return False
353    if not last.startswith('<') or not last.endswith(', Last>'):
354        return False
355    return first[1:-8] == last[1:-7]
356
357def gen_mph_data(name, d, kv_type, kv_callback):
358    (salt, keys) = minimal_perfect_hash(d)
359    out.write("pub(crate) const %s_SALT: &[u16] = &[\n" % name.upper())
360    for s in salt:
361        out.write("    0x{:x},\n".format(s))
362    out.write("];\n")
363    out.write("pub(crate) const {}_KV: &[{}] = &[\n".format(name.upper(), kv_type))
364    for k in keys:
365        out.write("    {},\n".format(kv_callback(k)))
366    out.write("];\n\n")
367
368def gen_combining_class(combining_classes, out):
369    gen_mph_data('canonical_combining_class', combining_classes, 'u32',
370        lambda k: "0x{:X}".format(int(combining_classes[k]) | (k << 8)))
371
372def gen_composition_table(canon_comp, out):
373    table = {}
374    for (c1, c2), c3 in canon_comp.items():
375        if c1 < 0x10000 and c2 < 0x10000:
376            table[(c1 << 16) | c2] = c3
377    (salt, keys) = minimal_perfect_hash(table)
378    gen_mph_data('COMPOSITION_TABLE', table, '(u32, char)',
379        lambda k: "(0x%s, '\\u{%s}')" % (hexify(k), hexify(table[k])))
380
381    out.write("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n")
382    out.write("    match (c1, c2) {\n")
383    for (c1, c2), c3 in sorted(canon_comp.items()):
384        if c1 >= 0x10000 and c2 >= 0x10000:
385            out.write("        ('\\u{%s}', '\\u{%s}') => Some('\\u{%s}'),\n" % (hexify(c1), hexify(c2), hexify(c3)))
386
387    out.write("        _ => None,\n")
388    out.write("    }\n")
389    out.write("}\n")
390
391def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_decomp, out):
392    tables = [(canon_decomp, 'canonical'), (compat_decomp, 'compatibility'), (cjk_compat_variants_decomp, 'cjk_compat_variants')]
393    for table, name in tables:
394        gen_mph_data(name + '_decomposed', table, "(u32, &'static [char])",
395            lambda k: "(0x{:x}, &[{}])".format(k,
396                ", ".join("'\\u{%s}'" % hexify(c) for c in table[k])))
397
398def gen_qc_match(prop_table, out):
399    out.write("    match c {\n")
400
401    for low, high, data in prop_table:
402        assert data in ('N', 'M')
403        result = "No" if data == 'N' else "Maybe"
404        if high:
405            out.write(r"        '\u{%s}'...'\u{%s}' => %s," % (low, high, result))
406        else:
407            out.write(r"        '\u{%s}' => %s," % (low, result))
408        out.write("\n")
409
410    out.write("        _ => Yes,\n")
411    out.write("    }\n")
412
413def gen_nfc_qc(prop_tables, out):
414    out.write("#[inline]\n")
415    out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
416    out.write("pub fn qc_nfc(c: char) -> IsNormalized {\n")
417    gen_qc_match(prop_tables['NFC_QC'], out)
418    out.write("}\n")
419
420def gen_nfkc_qc(prop_tables, out):
421    out.write("#[inline]\n")
422    out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
423    out.write("pub fn qc_nfkc(c: char) -> IsNormalized {\n")
424    gen_qc_match(prop_tables['NFKC_QC'], out)
425    out.write("}\n")
426
427def gen_nfd_qc(prop_tables, out):
428    out.write("#[inline]\n")
429    out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
430    out.write("pub fn qc_nfd(c: char) -> IsNormalized {\n")
431    gen_qc_match(prop_tables['NFD_QC'], out)
432    out.write("}\n")
433
434def gen_nfkd_qc(prop_tables, out):
435    out.write("#[inline]\n")
436    out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
437    out.write("pub fn qc_nfkd(c: char) -> IsNormalized {\n")
438    gen_qc_match(prop_tables['NFKD_QC'], out)
439    out.write("}\n")
440
441def gen_combining_mark(general_category_mark, out):
442    gen_mph_data('combining_mark', general_category_mark, 'u32',
443        lambda k: '0x{:04x}'.format(k))
444
445def gen_public_assigned(general_category_public_assigned, out):
446    # This could be done as a hash but the table is somewhat small.
447    out.write("#[inline]\n")
448    out.write("pub fn is_public_assigned(c: char) -> bool {\n")
449    out.write("    match c {\n")
450
451    start = True
452    for first, last in general_category_public_assigned:
453        if start:
454            out.write("        ")
455            start = False
456        else:
457            out.write("        | ")
458        if first == last:
459            out.write("'\\u{%s}'\n" % hexify(first))
460        else:
461            out.write("'\\u{%s}'..='\\u{%s}'\n" % (hexify(first), hexify(last)))
462    out.write("        => true,\n")
463
464    out.write("        _ => false,\n")
465    out.write("    }\n")
466    out.write("}\n")
467    out.write("\n")
468
469def gen_stream_safe(leading, trailing, out):
470    # This could be done as a hash but the table is very small.
471    out.write("#[inline]\n")
472    out.write("pub fn stream_safe_leading_nonstarters(c: char) -> usize {\n")
473    out.write("    match c {\n")
474
475    for char, num_leading in sorted(leading.items()):
476        out.write("        '\\u{%s}' => %d,\n" % (hexify(char), num_leading))
477
478    out.write("        _ => 0,\n")
479    out.write("    }\n")
480    out.write("}\n")
481    out.write("\n")
482
483    gen_mph_data('trailing_nonstarters', trailing, 'u32',
484        lambda k: "0x{:X}".format(int(trailing[k]) | (k << 8)))
485
486def gen_tests(tests, out):
487    out.write("""#[derive(Debug)]
488pub struct NormalizationTest {
489    pub source: &'static str,
490    pub nfc: &'static str,
491    pub nfd: &'static str,
492    pub nfkc: &'static str,
493    pub nfkd: &'static str,
494}
495
496""")
497
498    out.write("pub const NORMALIZATION_TESTS: &[NormalizationTest] = &[\n")
499    str_literal = lambda s: '"%s"' % "".join("\\u{%s}" % c for c in s)
500
501    for test in tests:
502        out.write("    NormalizationTest {\n")
503        out.write("        source: %s,\n" % str_literal(test.source))
504        out.write("        nfc: %s,\n" % str_literal(test.nfc))
505        out.write("        nfd: %s,\n" % str_literal(test.nfd))
506        out.write("        nfkc: %s,\n" % str_literal(test.nfkc))
507        out.write("        nfkd: %s,\n" % str_literal(test.nfkd))
508        out.write("    },\n")
509
510    out.write("];\n")
511
512# Guaranteed to be less than n.
513def my_hash(x, salt, n):
514    # This is hash based on the theory that multiplication is efficient
515    mask_32 = 0xffffffff
516    y = ((x + salt) * 2654435769) & mask_32
517    y ^= (x * 0x31415926) & mask_32
518    return (y * n) >> 32
519
520# Compute minimal perfect hash function, d can be either a dict or list of keys.
521def minimal_perfect_hash(d):
522    n = len(d)
523    buckets = dict((h, []) for h in range(n))
524    for key in d:
525        h = my_hash(key, 0, n)
526        buckets[h].append(key)
527    bsorted = [(len(buckets[h]), h) for h in range(n)]
528    bsorted.sort(reverse = True)
529    claimed = [False] * n
530    salts = [0] * n
531    keys = [0] * n
532    for (bucket_size, h) in bsorted:
533        # Note: the traditional perfect hashing approach would also special-case
534        # bucket_size == 1 here and assign any empty slot, rather than iterating
535        # until rehash finds an empty slot. But we're not doing that so we can
536        # avoid the branch.
537        if bucket_size == 0:
538            break
539        else:
540            for salt in range(1, 32768):
541                rehashes = [my_hash(key, salt, n) for key in buckets[h]]
542                # Make sure there are no rehash collisions within this bucket.
543                if all(not claimed[hash] for hash in rehashes):
544                    if len(set(rehashes)) < bucket_size:
545                        continue
546                    salts[h] = salt
547                    for key in buckets[h]:
548                        rehash = my_hash(key, salt, n)
549                        claimed[rehash] = True
550                        keys[rehash] = key
551                    break
552            if salts[h] == 0:
553                print("minimal perfect hashing failed")
554                # Note: if this happens (because of unfortunate data), then there are
555                # a few things that could be done. First, the hash function could be
556                # tweaked. Second, the bucket order could be scrambled (especially the
557                # singletons). Right now, the buckets are sorted, which has the advantage
558                # of being deterministic.
559                #
560                # As a more extreme approach, the singleton bucket optimization could be
561                # applied (give the direct address for singleton buckets, rather than
562                # relying on a rehash). That is definitely the more standard approach in
563                # the minimal perfect hashing literature, but in testing the branch was a
564                # significant slowdown.
565                exit(1)
566    return (salts, keys)
567
568if __name__ == '__main__':
569    data = UnicodeData()
570    with open("tables.rs", "w", newline = "\n") as out:
571        out.write(PREAMBLE)
572        out.write("use crate::quick_check::IsNormalized;\n")
573        out.write("use crate::quick_check::IsNormalized::*;\n")
574        out.write("\n")
575
576        version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split("."))
577        out.write("#[allow(unused)]\n")
578        out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n\n" % version)
579
580        gen_combining_class(data.combining_classes, out)
581        out.write("\n")
582
583        gen_composition_table(data.canon_comp, out)
584        out.write("\n")
585
586        gen_decomposition_tables(data.canon_fully_decomp, data.compat_fully_decomp, data.cjk_compat_variants_fully_decomp, out)
587
588        gen_combining_mark(data.general_category_mark, out)
589        out.write("\n")
590
591        gen_public_assigned(data.general_category_public_assigned, out)
592        out.write("\n")
593
594        gen_nfc_qc(data.norm_props, out)
595        out.write("\n")
596
597        gen_nfkc_qc(data.norm_props, out)
598        out.write("\n")
599
600        gen_nfd_qc(data.norm_props, out)
601        out.write("\n")
602
603        gen_nfkd_qc(data.norm_props, out)
604        out.write("\n")
605
606        gen_stream_safe(data.ss_leading, data.ss_trailing, out)
607        out.write("\n")
608
609    with open("normalization_tests.rs", "w", newline = "\n") as out:
610        out.write(PREAMBLE)
611        gen_tests(data.norm_tests, out)
612