1 package org.unicode.cldr.unittest; 2 3 import java.util.Set; 4 import java.util.TreeSet; 5 6 import org.unicode.cldr.util.VariantFolder; 7 8 import com.ibm.icu.text.CanonicalIterator; 9 import com.ibm.icu.text.UnicodeSet; 10 import com.ibm.icu.text.UnicodeSetIterator; 11 12 public class TestVariantFolder { 13 public static void main(String[] args) { 14 VariantFolder variantFolder = new VariantFolder( 15 new VariantFolder.CaseVariantFolder()); 16 String[] tests = { "abc", "aß", "\uFB01sh", "Åbë" }; 17 for (String test : tests) { 18 Set<String> set = variantFolder.getClosure(test); 19 System.out.println(test + "\t" + set.size() + "\t" 20 + new TreeSet<Object>(set)); 21 final Set<String> closed = closeUnderCanonicalization(set, 22 new TreeSet<String>()); 23 System.out.println(test + "\t" + closed.size() + "\t" + closed); 24 } 25 26 variantFolder = new VariantFolder( 27 new VariantFolder.CompatibilityFolder()); fmt(&self, f: &mut fmt::Formatter) -> fmt::Result28 String[] testSets = { "[:Word_Break=ExtendNumLet:]", 29 "[:Word_Break=Format:]", "[:Word_Break=Katakana:]", 30 "[[:Word_Break=MidLetter:]\u2018]", "[:Word_Break=MidNum:]", 31 "[[:Word_Break=MidNum:]-[\\uFE13]]", "[:Word_Break=Numeric:]", 32 "[\\u0027\\u2018\\u2019\\u002e]", }; 33 for (String testSet : testSets) { 34 UnicodeSet source = new UnicodeSet(testSet); 35 Set<String> target = new TreeSet<String>(); 36 for (UnicodeSetIterator it = new UnicodeSetIterator(source); it 37 .next();) { 38 Set<String> closure = variantFolder.getClosure(it.getString()); 39 target.addAll(closure); 40 } 41 UnicodeSet utarget = new UnicodeSet(); property<I>() -> impl Parser<Input = I, Output = (String, String)> where I: Stream<Item = char>, I::Error: ParseError<I::Item, I::Range, I::Position>,42 utarget.addAll(target); 43 System.out.println(testSet + " => " 44 + new UnicodeSet(utarget).removeAll(source)); 45 } 46 } 47 48 static CanonicalIterator canonicalterator = new CanonicalIterator(""); 49 50 static Set<String> closeUnderCanonicalization(Set<String> source, 51 Set<String> output) { 52 for (String item : source) { 53 canonicalterator.setSource(item); 54 for (String equiv = canonicalterator.next(); equiv != null; equiv = canonicalterator 55 .next()) { 56 output.add(equiv); whitespace<I>() -> impl Parser<Input = I> where I: Stream<Item = char>, I::Error: ParseError<I::Item, I::Range, I::Position>,57 } 58 } 59 return output; 60 } 61 }