1#!/usr/bin/env python 2# 3# Copyright 2011-2018 The Rust Project Developers. See the COPYRIGHT 4# file at the top-level directory of this distribution and at 5# http://rust-lang.org/COPYRIGHT. 6# 7# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 8# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 9# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 10# option. This file may not be copied, modified, or distributed 11# except according to those terms. 12 13# This script uses the following Unicode tables: 14# - DerivedNormalizationProps.txt 15# - NormalizationTest.txt 16# - UnicodeData.txt 17# - StandardizedVariants.txt 18# 19# Since this should not require frequent updates, we just store this 20# out-of-line and check the tables.rs and normalization_tests.rs files into git. 21import collections 22import urllib.request 23 24UNICODE_VERSION = "13.0.0" 25UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION 26 27PREAMBLE = """// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT 28// file at the top-level directory of this distribution and at 29// http://rust-lang.org/COPYRIGHT. 30// 31// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 32// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 33// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 34// option. This file may not be copied, modified, or distributed 35// except according to those terms. 36 37// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly 38 39#![allow(missing_docs)] 40""" 41 42NormalizationTest = collections.namedtuple( 43 "NormalizationTest", 44 ["source", "nfc", "nfd", "nfkc", "nfkd"], 45) 46 47# Mapping taken from Table 12 from: 48# http://www.unicode.org/reports/tr44/#General_Category_Values 49expanded_categories = { 50 'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'], 51 'Lm': ['L'], 'Lo': ['L'], 52 'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'], 53 'Nd': ['N'], 'Nl': ['N'], 'No': ['No'], 54 'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'], 55 'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'], 56 'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'], 57 'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'], 58 'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'], 59} 60 61# Constants from Unicode 9.0.0 Section 3.12 Conjoining Jamo Behavior 62# http://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#M9.32468.Heading.310.Combining.Jamo.Behavior 63S_BASE, L_COUNT, V_COUNT, T_COUNT = 0xAC00, 19, 21, 28 64S_COUNT = L_COUNT * V_COUNT * T_COUNT 65 66class UnicodeData(object): 67 def __init__(self): 68 self._load_unicode_data() 69 self.norm_props = self._load_norm_props() 70 self.norm_tests = self._load_norm_tests() 71 72 self.canon_comp = self._compute_canonical_comp() 73 self.canon_fully_decomp, self.compat_fully_decomp = self._compute_fully_decomposed() 74 75 self.cjk_compat_variants_fully_decomp = {} 76 self._load_cjk_compat_ideograph_variants() 77 78 def stats(name, table): 79 count = sum(len(v) for v in table.values()) 80 print("%s: %d chars => %d decomposed chars" % (name, len(table), count)) 81 82 print("Decomposition table stats:") 83 stats("Canonical decomp", self.canon_decomp) 84 stats("Compatible decomp", self.compat_decomp) 85 stats("Canonical fully decomp", self.canon_fully_decomp) 86 stats("Compatible fully decomp", self.compat_fully_decomp) 87 stats("CJK Compat Variants fully decomp", self.cjk_compat_variants_fully_decomp) 88 89 self.ss_leading, self.ss_trailing = self._compute_stream_safe_tables() 90 91 def _fetch(self, filename): 92 resp = urllib.request.urlopen(UCD_URL + filename) 93 return resp.read().decode('utf-8') 94 95 def _load_unicode_data(self): 96 self.name_to_char_int = {} 97 self.combining_classes = {} 98 self.compat_decomp = {} 99 self.canon_decomp = {} 100 self.general_category_mark = [] 101 self.general_category_public_assigned = [] 102 103 assigned_start = 0; 104 prev_char_int = -1; 105 prev_name = ""; 106 107 for line in self._fetch("UnicodeData.txt").splitlines(): 108 # See ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html 109 pieces = line.split(';') 110 assert len(pieces) == 15 111 char, name, category, cc, decomp = pieces[0], pieces[1], pieces[2], pieces[3], pieces[5] 112 char_int = int(char, 16) 113 114 name = pieces[1].strip() 115 self.name_to_char_int[name] = char_int 116 117 if cc != '0': 118 self.combining_classes[char_int] = cc 119 120 if decomp.startswith('<'): 121 self.compat_decomp[char_int] = [int(c, 16) for c in decomp.split()[1:]] 122 elif decomp != '': 123 self.canon_decomp[char_int] = [int(c, 16) for c in decomp.split()] 124 125 if category == 'M' or 'M' in expanded_categories.get(category, []): 126 self.general_category_mark.append(char_int) 127 128 assert category != 'Cn', "Unexpected: Unassigned codepoint in UnicodeData.txt" 129 if category not in ['Co', 'Cs']: 130 if char_int != prev_char_int + 1 and not is_first_and_last(prev_name, name): 131 self.general_category_public_assigned.append((assigned_start, prev_char_int)) 132 assigned_start = char_int 133 prev_char_int = char_int 134 prev_name = name; 135 136 self.general_category_public_assigned.append((assigned_start, prev_char_int)) 137 138 def _load_cjk_compat_ideograph_variants(self): 139 for line in self._fetch("StandardizedVariants.txt").splitlines(): 140 strip_comments = line.split('#', 1)[0].strip() 141 if not strip_comments: 142 continue 143 144 variation_sequence, description, differences = strip_comments.split(';') 145 description = description.strip() 146 147 # Don't use variations that only apply in particular shaping environments. 148 if differences: 149 continue 150 151 # Look for entries where the description field is a codepoint name. 152 if description not in self.name_to_char_int: 153 continue 154 155 # Only consider the CJK Compatibility Ideographs. 156 if not description.startswith('CJK COMPATIBILITY IDEOGRAPH-'): 157 continue 158 159 char_int = self.name_to_char_int[description] 160 161 assert not char_int in self.combining_classes, "Unexpected: CJK compat variant with a combining class" 162 assert not char_int in self.compat_decomp, "Unexpected: CJK compat variant and compatibility decomposition" 163 assert len(self.canon_decomp[char_int]) == 1, "Unexpected: CJK compat variant and non-singleton canonical decomposition" 164 # If we ever need to handle Hangul here, we'll need to handle it separately. 165 assert not (S_BASE <= char_int < S_BASE + S_COUNT) 166 167 cjk_compat_variant_parts = [int(c, 16) for c in variation_sequence.split()] 168 for c in cjk_compat_variant_parts: 169 assert not c in self.canon_decomp, "Unexpected: CJK compat variant is unnormalized (canon)" 170 assert not c in self.compat_decomp, "Unexpected: CJK compat variant is unnormalized (compat)" 171 self.cjk_compat_variants_fully_decomp[char_int] = cjk_compat_variant_parts 172 173 def _load_norm_props(self): 174 props = collections.defaultdict(list) 175 176 for line in self._fetch("DerivedNormalizationProps.txt").splitlines(): 177 (prop_data, _, _) = line.partition("#") 178 prop_pieces = prop_data.split(";") 179 180 if len(prop_pieces) < 2: 181 continue 182 183 assert len(prop_pieces) <= 3 184 (low, _, high) = prop_pieces[0].strip().partition("..") 185 186 prop = prop_pieces[1].strip() 187 188 data = None 189 if len(prop_pieces) == 3: 190 data = prop_pieces[2].strip() 191 192 props[prop].append((low, high, data)) 193 194 return props 195 196 def _load_norm_tests(self): 197 tests = [] 198 for line in self._fetch("NormalizationTest.txt").splitlines(): 199 (test_data, _, _) = line.partition("#") 200 test_pieces = test_data.split(";") 201 202 if len(test_pieces) < 5: 203 continue 204 205 source, nfc, nfd, nfkc, nfkd = [[c.strip() for c in p.split()] for p in test_pieces[:5]] 206 tests.append(NormalizationTest(source, nfc, nfd, nfkc, nfkd)) 207 208 return tests 209 210 def _compute_canonical_comp(self): 211 canon_comp = {} 212 comp_exclusions = [ 213 (int(low, 16), int(high or low, 16)) 214 for low, high, _ in self.norm_props["Full_Composition_Exclusion"] 215 ] 216 for char_int, decomp in self.canon_decomp.items(): 217 if any(lo <= char_int <= hi for lo, hi in comp_exclusions): 218 continue 219 220 assert len(decomp) == 2 221 assert (decomp[0], decomp[1]) not in canon_comp 222 canon_comp[(decomp[0], decomp[1])] = char_int 223 224 return canon_comp 225 226 def _compute_fully_decomposed(self): 227 """ 228 Even though the decomposition algorithm is recursive, it is possible 229 to precompute the recursion at table generation time with modest 230 increase to the table size. Then, for these precomputed tables, we 231 note that 1) compatible decomposition is a subset of canonical 232 decomposition and 2) they mostly agree on their intersection. 233 Therefore, we don't store entries in the compatible table for 234 characters that decompose the same way under canonical decomposition. 235 236 Decomposition table stats: 237 Canonical decomp: 2060 chars => 3085 decomposed chars 238 Compatible decomp: 3662 chars => 5440 decomposed chars 239 Canonical fully decomp: 2060 chars => 3404 decomposed chars 240 Compatible fully decomp: 3678 chars => 5599 decomposed chars 241 242 The upshot is that decomposition code is very simple and easy to inline 243 at mild code size cost. 244 """ 245 def _decompose(char_int, compatible): 246 # 7-bit ASCII never decomposes 247 if char_int <= 0x7f: 248 yield char_int 249 return 250 251 # Assert that we're handling Hangul separately. 252 assert not (S_BASE <= char_int < S_BASE + S_COUNT) 253 254 decomp = self.canon_decomp.get(char_int) 255 if decomp is not None: 256 for decomposed_ch in decomp: 257 for fully_decomposed_ch in _decompose(decomposed_ch, compatible): 258 yield fully_decomposed_ch 259 return 260 261 if compatible and char_int in self.compat_decomp: 262 for decomposed_ch in self.compat_decomp[char_int]: 263 for fully_decomposed_ch in _decompose(decomposed_ch, compatible): 264 yield fully_decomposed_ch 265 return 266 267 yield char_int 268 return 269 270 end_codepoint = max( 271 max(self.canon_decomp.keys()), 272 max(self.compat_decomp.keys()), 273 ) 274 275 canon_fully_decomp = {} 276 compat_fully_decomp = {} 277 278 for char_int in range(0, end_codepoint + 1): 279 # Always skip Hangul, since it's more efficient to represent its 280 # decomposition programmatically. 281 if S_BASE <= char_int < S_BASE + S_COUNT: 282 continue 283 284 canon = list(_decompose(char_int, False)) 285 if not (len(canon) == 1 and canon[0] == char_int): 286 canon_fully_decomp[char_int] = canon 287 288 compat = list(_decompose(char_int, True)) 289 if not (len(compat) == 1 and compat[0] == char_int): 290 compat_fully_decomp[char_int] = compat 291 292 # Since canon_fully_decomp is a subset of compat_fully_decomp, we don't 293 # need to store their overlap when they agree. When they don't agree, 294 # store the decomposition in the compatibility table since we'll check 295 # that first when normalizing to NFKD. 296 assert set(canon_fully_decomp) <= set(compat_fully_decomp) 297 298 for ch in set(canon_fully_decomp) & set(compat_fully_decomp): 299 if canon_fully_decomp[ch] == compat_fully_decomp[ch]: 300 del compat_fully_decomp[ch] 301 302 return canon_fully_decomp, compat_fully_decomp 303 304 def _compute_stream_safe_tables(self): 305 """ 306 To make a text stream-safe with the Stream-Safe Text Process (UAX15-D4), 307 we need to be able to know the number of contiguous non-starters *after* 308 applying compatibility decomposition to each character. 309 310 We can do this incrementally by computing the number of leading and 311 trailing non-starters for each character's compatibility decomposition 312 with the following rules: 313 314 1) If a character is not affected by compatibility decomposition, look 315 up its canonical combining class to find out if it's a non-starter. 316 2) All Hangul characters are starters, even under decomposition. 317 3) Otherwise, very few decomposing characters have a nonzero count 318 of leading or trailing non-starters, so store these characters 319 with their associated counts in a separate table. 320 """ 321 leading_nonstarters = {} 322 trailing_nonstarters = {} 323 324 for c in set(self.canon_fully_decomp) | set(self.compat_fully_decomp): 325 decomposed = self.compat_fully_decomp.get(c) or self.canon_fully_decomp[c] 326 327 num_leading = 0 328 for d in decomposed: 329 if d not in self.combining_classes: 330 break 331 num_leading += 1 332 333 num_trailing = 0 334 for d in reversed(decomposed): 335 if d not in self.combining_classes: 336 break 337 num_trailing += 1 338 339 if num_leading > 0: 340 leading_nonstarters[c] = num_leading 341 if num_trailing > 0: 342 trailing_nonstarters[c] = num_trailing 343 344 return leading_nonstarters, trailing_nonstarters 345 346hexify = lambda c: '{:04X}'.format(c) 347 348# Test whether `first` and `last` are corresponding "<..., First>" and 349# "<..., Last>" markers. 350def is_first_and_last(first, last): 351 if not first.startswith('<') or not first.endswith(', First>'): 352 return False 353 if not last.startswith('<') or not last.endswith(', Last>'): 354 return False 355 return first[1:-8] == last[1:-7] 356 357def gen_mph_data(name, d, kv_type, kv_callback): 358 (salt, keys) = minimal_perfect_hash(d) 359 out.write("pub(crate) const %s_SALT: &[u16] = &[\n" % name.upper()) 360 for s in salt: 361 out.write(" 0x{:x},\n".format(s)) 362 out.write("];\n") 363 out.write("pub(crate) const {}_KV: &[{}] = &[\n".format(name.upper(), kv_type)) 364 for k in keys: 365 out.write(" {},\n".format(kv_callback(k))) 366 out.write("];\n\n") 367 368def gen_combining_class(combining_classes, out): 369 gen_mph_data('canonical_combining_class', combining_classes, 'u32', 370 lambda k: "0x{:X}".format(int(combining_classes[k]) | (k << 8))) 371 372def gen_composition_table(canon_comp, out): 373 table = {} 374 for (c1, c2), c3 in canon_comp.items(): 375 if c1 < 0x10000 and c2 < 0x10000: 376 table[(c1 << 16) | c2] = c3 377 (salt, keys) = minimal_perfect_hash(table) 378 gen_mph_data('COMPOSITION_TABLE', table, '(u32, char)', 379 lambda k: "(0x%s, '\\u{%s}')" % (hexify(k), hexify(table[k]))) 380 381 out.write("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n") 382 out.write(" match (c1, c2) {\n") 383 for (c1, c2), c3 in sorted(canon_comp.items()): 384 if c1 >= 0x10000 and c2 >= 0x10000: 385 out.write(" ('\\u{%s}', '\\u{%s}') => Some('\\u{%s}'),\n" % (hexify(c1), hexify(c2), hexify(c3))) 386 387 out.write(" _ => None,\n") 388 out.write(" }\n") 389 out.write("}\n") 390 391def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_decomp, out): 392 tables = [(canon_decomp, 'canonical'), (compat_decomp, 'compatibility'), (cjk_compat_variants_decomp, 'cjk_compat_variants')] 393 for table, name in tables: 394 gen_mph_data(name + '_decomposed', table, "(u32, &'static [char])", 395 lambda k: "(0x{:x}, &[{}])".format(k, 396 ", ".join("'\\u{%s}'" % hexify(c) for c in table[k]))) 397 398def gen_qc_match(prop_table, out): 399 out.write(" match c {\n") 400 401 for low, high, data in prop_table: 402 assert data in ('N', 'M') 403 result = "No" if data == 'N' else "Maybe" 404 if high: 405 out.write(r" '\u{%s}'...'\u{%s}' => %s," % (low, high, result)) 406 else: 407 out.write(r" '\u{%s}' => %s," % (low, result)) 408 out.write("\n") 409 410 out.write(" _ => Yes,\n") 411 out.write(" }\n") 412 413def gen_nfc_qc(prop_tables, out): 414 out.write("#[inline]\n") 415 out.write("#[allow(ellipsis_inclusive_range_patterns)]\n") 416 out.write("pub fn qc_nfc(c: char) -> IsNormalized {\n") 417 gen_qc_match(prop_tables['NFC_QC'], out) 418 out.write("}\n") 419 420def gen_nfkc_qc(prop_tables, out): 421 out.write("#[inline]\n") 422 out.write("#[allow(ellipsis_inclusive_range_patterns)]\n") 423 out.write("pub fn qc_nfkc(c: char) -> IsNormalized {\n") 424 gen_qc_match(prop_tables['NFKC_QC'], out) 425 out.write("}\n") 426 427def gen_nfd_qc(prop_tables, out): 428 out.write("#[inline]\n") 429 out.write("#[allow(ellipsis_inclusive_range_patterns)]\n") 430 out.write("pub fn qc_nfd(c: char) -> IsNormalized {\n") 431 gen_qc_match(prop_tables['NFD_QC'], out) 432 out.write("}\n") 433 434def gen_nfkd_qc(prop_tables, out): 435 out.write("#[inline]\n") 436 out.write("#[allow(ellipsis_inclusive_range_patterns)]\n") 437 out.write("pub fn qc_nfkd(c: char) -> IsNormalized {\n") 438 gen_qc_match(prop_tables['NFKD_QC'], out) 439 out.write("}\n") 440 441def gen_combining_mark(general_category_mark, out): 442 gen_mph_data('combining_mark', general_category_mark, 'u32', 443 lambda k: '0x{:04x}'.format(k)) 444 445def gen_public_assigned(general_category_public_assigned, out): 446 # This could be done as a hash but the table is somewhat small. 447 out.write("#[inline]\n") 448 out.write("pub fn is_public_assigned(c: char) -> bool {\n") 449 out.write(" match c {\n") 450 451 start = True 452 for first, last in general_category_public_assigned: 453 if start: 454 out.write(" ") 455 start = False 456 else: 457 out.write(" | ") 458 if first == last: 459 out.write("'\\u{%s}'\n" % hexify(first)) 460 else: 461 out.write("'\\u{%s}'..='\\u{%s}'\n" % (hexify(first), hexify(last))) 462 out.write(" => true,\n") 463 464 out.write(" _ => false,\n") 465 out.write(" }\n") 466 out.write("}\n") 467 out.write("\n") 468 469def gen_stream_safe(leading, trailing, out): 470 # This could be done as a hash but the table is very small. 471 out.write("#[inline]\n") 472 out.write("pub fn stream_safe_leading_nonstarters(c: char) -> usize {\n") 473 out.write(" match c {\n") 474 475 for char, num_leading in sorted(leading.items()): 476 out.write(" '\\u{%s}' => %d,\n" % (hexify(char), num_leading)) 477 478 out.write(" _ => 0,\n") 479 out.write(" }\n") 480 out.write("}\n") 481 out.write("\n") 482 483 gen_mph_data('trailing_nonstarters', trailing, 'u32', 484 lambda k: "0x{:X}".format(int(trailing[k]) | (k << 8))) 485 486def gen_tests(tests, out): 487 out.write("""#[derive(Debug)] 488pub struct NormalizationTest { 489 pub source: &'static str, 490 pub nfc: &'static str, 491 pub nfd: &'static str, 492 pub nfkc: &'static str, 493 pub nfkd: &'static str, 494} 495 496""") 497 498 out.write("pub const NORMALIZATION_TESTS: &[NormalizationTest] = &[\n") 499 str_literal = lambda s: '"%s"' % "".join("\\u{%s}" % c for c in s) 500 501 for test in tests: 502 out.write(" NormalizationTest {\n") 503 out.write(" source: %s,\n" % str_literal(test.source)) 504 out.write(" nfc: %s,\n" % str_literal(test.nfc)) 505 out.write(" nfd: %s,\n" % str_literal(test.nfd)) 506 out.write(" nfkc: %s,\n" % str_literal(test.nfkc)) 507 out.write(" nfkd: %s,\n" % str_literal(test.nfkd)) 508 out.write(" },\n") 509 510 out.write("];\n") 511 512# Guaranteed to be less than n. 513def my_hash(x, salt, n): 514 # This is hash based on the theory that multiplication is efficient 515 mask_32 = 0xffffffff 516 y = ((x + salt) * 2654435769) & mask_32 517 y ^= (x * 0x31415926) & mask_32 518 return (y * n) >> 32 519 520# Compute minimal perfect hash function, d can be either a dict or list of keys. 521def minimal_perfect_hash(d): 522 n = len(d) 523 buckets = dict((h, []) for h in range(n)) 524 for key in d: 525 h = my_hash(key, 0, n) 526 buckets[h].append(key) 527 bsorted = [(len(buckets[h]), h) for h in range(n)] 528 bsorted.sort(reverse = True) 529 claimed = [False] * n 530 salts = [0] * n 531 keys = [0] * n 532 for (bucket_size, h) in bsorted: 533 # Note: the traditional perfect hashing approach would also special-case 534 # bucket_size == 1 here and assign any empty slot, rather than iterating 535 # until rehash finds an empty slot. But we're not doing that so we can 536 # avoid the branch. 537 if bucket_size == 0: 538 break 539 else: 540 for salt in range(1, 32768): 541 rehashes = [my_hash(key, salt, n) for key in buckets[h]] 542 # Make sure there are no rehash collisions within this bucket. 543 if all(not claimed[hash] for hash in rehashes): 544 if len(set(rehashes)) < bucket_size: 545 continue 546 salts[h] = salt 547 for key in buckets[h]: 548 rehash = my_hash(key, salt, n) 549 claimed[rehash] = True 550 keys[rehash] = key 551 break 552 if salts[h] == 0: 553 print("minimal perfect hashing failed") 554 # Note: if this happens (because of unfortunate data), then there are 555 # a few things that could be done. First, the hash function could be 556 # tweaked. Second, the bucket order could be scrambled (especially the 557 # singletons). Right now, the buckets are sorted, which has the advantage 558 # of being deterministic. 559 # 560 # As a more extreme approach, the singleton bucket optimization could be 561 # applied (give the direct address for singleton buckets, rather than 562 # relying on a rehash). That is definitely the more standard approach in 563 # the minimal perfect hashing literature, but in testing the branch was a 564 # significant slowdown. 565 exit(1) 566 return (salts, keys) 567 568if __name__ == '__main__': 569 data = UnicodeData() 570 with open("tables.rs", "w", newline = "\n") as out: 571 out.write(PREAMBLE) 572 out.write("use crate::quick_check::IsNormalized;\n") 573 out.write("use crate::quick_check::IsNormalized::*;\n") 574 out.write("\n") 575 576 version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split(".")) 577 out.write("#[allow(unused)]\n") 578 out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n\n" % version) 579 580 gen_combining_class(data.combining_classes, out) 581 out.write("\n") 582 583 gen_composition_table(data.canon_comp, out) 584 out.write("\n") 585 586 gen_decomposition_tables(data.canon_fully_decomp, data.compat_fully_decomp, data.cjk_compat_variants_fully_decomp, out) 587 588 gen_combining_mark(data.general_category_mark, out) 589 out.write("\n") 590 591 gen_public_assigned(data.general_category_public_assigned, out) 592 out.write("\n") 593 594 gen_nfc_qc(data.norm_props, out) 595 out.write("\n") 596 597 gen_nfkc_qc(data.norm_props, out) 598 out.write("\n") 599 600 gen_nfd_qc(data.norm_props, out) 601 out.write("\n") 602 603 gen_nfkd_qc(data.norm_props, out) 604 out.write("\n") 605 606 gen_stream_safe(data.ss_leading, data.ss_trailing, out) 607 out.write("\n") 608 609 with open("normalization_tests.rs", "w", newline = "\n") as out: 610 out.write(PREAMBLE) 611 gen_tests(data.norm_tests, out) 612