1 package org.unicode.cldr.unittest;
2 
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileInputStream;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.nio.charset.Charset;
9 import java.util.Collections;
10 import java.util.HashSet;
11 import java.util.LinkedHashSet;
12 import java.util.List;
13 import java.util.Locale;
14 import java.util.Map;
15 import java.util.Map.Entry;
16 import java.util.Set;
17 import java.util.TreeMap;
18 import java.util.TreeSet;
19 import java.util.concurrent.ConcurrentHashMap;
20 import java.util.concurrent.atomic.AtomicInteger;
21 import java.util.regex.Matcher;
22 import java.util.stream.Collectors;
23 import java.util.stream.Stream;
24 
25 import javax.xml.stream.XMLInputFactory;
26 import javax.xml.stream.XMLStreamConstants;
27 import javax.xml.stream.XMLStreamException;
28 import javax.xml.stream.XMLStreamReader;
29 
30 import org.unicode.cldr.tool.VerifyAttributeValues;
31 import org.unicode.cldr.tool.VerifyAttributeValues.Errors;
32 import org.unicode.cldr.util.AttributeValueValidity;
33 import org.unicode.cldr.util.AttributeValueValidity.AttributeValueSpec;
34 import org.unicode.cldr.util.AttributeValueValidity.MatcherPattern;
35 import org.unicode.cldr.util.AttributeValueValidity.Status;
36 import org.unicode.cldr.util.CLDRConfig;
37 import org.unicode.cldr.util.CLDRFile;
38 import org.unicode.cldr.util.CLDRPaths;
39 import org.unicode.cldr.util.ChainedMap;
40 import org.unicode.cldr.util.ChainedMap.M4;
41 import org.unicode.cldr.util.DtdData;
42 import org.unicode.cldr.util.DtdData.ValueStatus;
43 import org.unicode.cldr.util.DtdType;
44 import org.unicode.cldr.util.LanguageInfo;
45 import org.unicode.cldr.util.Organization;
46 import org.unicode.cldr.util.StandardCodes;
47 import org.unicode.cldr.util.StandardCodes.LstrField;
48 import org.unicode.cldr.util.StandardCodes.LstrType;
49 import org.unicode.cldr.util.StripUTF8BOMInputStream;
50 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo;
51 import org.unicode.cldr.util.Validity;
52 import org.unicode.cldr.util.XPathParts;
53 import org.xml.sax.Attributes;
54 
55 import com.google.common.base.Joiner;
56 import com.google.common.base.Splitter;
57 import com.google.common.collect.ImmutableList;
58 import com.google.common.collect.ImmutableMap;
59 import com.google.common.collect.ImmutableSet;
60 import com.google.common.collect.ImmutableSortedSet;
61 import com.google.common.collect.Multimap;
62 import com.ibm.icu.dev.test.TestFmwk;
63 import com.ibm.icu.impl.Row.R3;
64 import com.ibm.icu.util.ICUException;
65 import com.ibm.icu.util.Output;
66 
67 public class TestAttributeValues extends TestFmwk {
68     private static final boolean SERIAL = false;
69 
70     private static final Validity VALIDITY = Validity.getInstance();
71     private static final File BASE_DIR = new File(CLDRPaths.BASE_DIRECTORY);
72     public static final Joiner SPACE_JOINER = Joiner.on(' ');
73     public static final Splitter SPACE_SPLITTER = Splitter.on(' ').trimResults().omitEmptyStrings();
74     static final Splitter SEMI_SPACE = Splitter.on(';').trimResults().omitEmptyStrings();
75     private static final CLDRConfig config = CLDRConfig.getInstance();
76 
77     static final List<String> COMMON_AND_SEED = ImmutableList.of(CLDRPaths.COMMON_DIRECTORY, CLDRPaths.SEED_DIRECTORY);
78 
main(String[] args)79     public static void main(String[] args) {
80         new TestAttributeValues().run(args);
81     }
82 
TestValid()83     public void TestValid() {
84         String dtdTypeArg = params.props == null ? null : (String) params.props.get("dtdtype");
85 
86         // short- circuits for testing. null means do all
87         Set<DtdType> checkTypes = dtdTypeArg == null ? DtdType.STANDARD_SET
88             : Collections.singleton(DtdType.valueOf(dtdTypeArg)) ;
89         ImmutableSet<ValueStatus> showStatuses = null ; // ImmutableSet.of(ValueStatus.invalid, ValueStatus.unknown);
90 
91         for (DtdType dtdType : checkTypes) {
92             PathChecker pathChecker = new PathChecker(this, DtdData.getInstance(dtdType));
93             for (String mainDirs : COMMON_AND_SEED) {
94                 Set<String> files = new TreeSet<>();
95                 for (String stringDir : dtdType.directories) {
96                     addXMLFiles(dtdType, mainDirs + stringDir, files);
97                     if (isVerbose())
98                         synchronized (pathChecker.testLog) {
99                         warnln(mainDirs + stringDir);
100                     }
101                 }
102                 Stream<String> stream = SERIAL ? files.stream() : files.parallelStream();
103                 stream.forEach(file -> checkFile(pathChecker, file));
104 
105 //                for (String file : files) {
106 //                    checkFile(pathChecker, file);
107 //                }
108             }
109             pathChecker.show(isVerbose(), showStatuses);
110         }
111 //        List<String> localesToTest = Arrays.asList("en", "root"); // , "zh", "hi", "ja", "ru", "cy"
112 //        Set<String> localesToTest = config.getCommonAndSeedAndMainAndAnnotationsFactory().getAvailable();
113 //        // TODO, add all other files
114 
115 //        for (String locale : localesToTest) {
116 //            CLDRFile file = config.getCLDRFile(locale, false);
117 //            for (String dpath : file) {
118 //                String path = file.getFullXPath(dpath);
119 //                pathChecker.checkPath(path);
120 //            }
121 //        }
122     }
123 
124 
125     static final Set<String> CLDR_LOCALES = ImmutableSortedSet.copyOf(StandardCodes.make()
126         .getLocaleCoverageLocales(Organization.cldr)
127         .stream()
128         .map(x -> x + ".xml")
129         .collect(Collectors.toSet()));
130 
addXMLFiles(DtdType dtdType, String path, Set<String> files)131     private void addXMLFiles(DtdType dtdType, String path, Set<String> files) {
132         File dirFile = new File(path);
133         if (!dirFile.exists()) {
134             return;
135         }
136         if (!dirFile.isDirectory()) {
137             if (getInclusion() <= 5
138                 && dtdType == DtdType.ldml) {
139                 if (path.contains("/annotationsDerived/")) {
140                     return;
141                 }
142                 String ending = path.substring(path.lastIndexOf('/')+1);
143                 if (!CLDR_LOCALES.contains(ending)) {
144                     return;
145                 }
146             }
147             files.add(path);
148         } else {
149             for (String file : dirFile.list()) {
150                 addXMLFiles(dtdType, path + "/" + file, files);
151             }
152         }
153     }
154 
155 
checkFile(PathChecker pathChecker, String fullFile)156     private void checkFile(PathChecker pathChecker, String fullFile) {
157         if (!fullFile.endsWith(".xml")) {
158             return;
159         }
160         pathChecker.fileCount.incrementAndGet();
161 //        if (isVerbose()) synchronized (this) {
162 //            logln(fullFile);
163 //        }
164         XMLInputFactory f = XMLInputFactory.newInstance();
165 //        XMLInputFactory f = XMLInputFactory.newFactory("org.apache.xerces.jaxp.SAXParserFactoryImpl",
166 //            ClassLoader.getSystemClassLoader());
167 
168         int _elementCount = 0;
169         int _attributeCount = 0;
170 
171         try {
172             // should convert these over to new io.
173             try (InputStream fis0 = new FileInputStream(fullFile);
174                 InputStream fis = new StripUTF8BOMInputStream(fis0);
175                 InputStreamReader inputStreamReader = new InputStreamReader(fis, Charset.forName("UTF-8"));
176                 BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
177                 ) {
178                 XMLStreamReader r = f.createXMLStreamReader(fullFile, bufferedReader);
179                 String element = null;
180                 while(r.hasNext()) {
181                     try {
182                         switch(r.next()){
183                         case XMLStreamConstants.START_ELEMENT:
184                             element = r.getLocalName();
185                             ++_elementCount;
186                             int attributeSize = r.getAttributeCount();
187                             for (int i = 0; i < attributeSize; ++i) {
188                                 ++_attributeCount;
189                                 String attribute = r.getAttributeLocalName(i);
190                                 String attributeValue = r.getAttributeValue(i);
191                                 pathChecker.checkAttribute(element, attribute, attributeValue);
192                             }
193                             break;
194                         }
195                     } catch (XMLStreamException e) {
196                         synchronized (pathChecker.testLog) {
197                             pathChecker.testLog.errln(fullFile + "error");
198                         }
199                         e.printStackTrace(pathChecker.testLog.getLogPrintWriter());
200                     }
201                 }
202                 //XMLFileReader.read("noId", inputStreamReader, -1, true, myHandler);
203             } catch (XMLStreamException e) {
204                 if (!logKnownIssue("cldrbug 10120", "XML reading issue")) {
205                     warnln("Can't read " + fullFile);
206                 } else {
207                     throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fullFile).initCause(e);
208                 }
209             }
210         } catch (Exception e) {
211             throw new ICUException(fullFile, e);
212         }
213         pathChecker.elementCount.addAndGet(_elementCount);
214         pathChecker.attributeCount.addAndGet(_attributeCount);
215     }
216 
217     static class PathChecker {
218         private final ChainedMap.M5<ValueStatus, String, String, String, Boolean> valueStatusInfo
219         = ChainedMap.of(new TreeMap(), new TreeMap(), new TreeMap(), new TreeMap(), Boolean.class);
220         private final Set<String> seen = new HashSet<>();
221         private final Map<String,Map<String,Map<String,Boolean>>> seenEAV = new ConcurrentHashMap<>();
222         private final TestFmwk testLog;
223         private final DtdData dtdData;
224         private final Multimap<String, String> needsTesting;
225         private final Map<String,String> matchValues;
226 
227         private final AtomicInteger fileCount = new AtomicInteger();
228         private final AtomicInteger elementCount = new AtomicInteger();
229         private final AtomicInteger attributeCount = new AtomicInteger();
230 
PathChecker(TestFmwk testLog, DtdData dtdData)231         public PathChecker(TestFmwk testLog, DtdData dtdData) {
232             this.testLog = testLog;
233             this.dtdData = dtdData;
234             Map<String,String> _matchValues = new TreeMap<>();
235             needsTesting = dtdData.getNonEnumerated(_matchValues);
236             matchValues = ImmutableMap.copyOf(_matchValues);
237         }
238 
checkPath(String path)239         private void checkPath(String path) {
240             if (seen.contains(path)) {
241                 return;
242             }
243             seen.add(path);
244             if (path.contains("length-point")) {
245                 int debug = 0;
246             }
247             XPathParts parts = XPathParts.getFrozenInstance(path);
248             for (int elementIndex = 0; elementIndex < parts.size(); ++elementIndex) {
249                 String element = parts.getElement(elementIndex);
250                 for (Entry<String, String> entry : parts.getAttributes(elementIndex).entrySet()) {
251                     String attribute = entry.getKey();
252                     String attrValue = entry.getValue();
253                     checkAttribute(element, attribute, attrValue);
254                 }
255             }
256         }
257 
checkElement(String element, Attributes atts)258         public void checkElement(String element, Attributes atts) {
259             int length = atts.getLength();
260             for (int i = 0; i < length; ++i) {
261                 checkAttribute(element, atts.getQName(i), atts.getValue(i));
262             }
263         }
264 
checkAttribute(String element, String attribute, String attrValue)265         private void checkAttribute(String element, String attribute, String attrValue) {
266             // skip cases we know we don't need to test
267             if (!needsTesting.containsEntry(element, attribute)) {
268                 return;
269             }
270             // check if we've seen the EAV yet
271             // we don't need to synchronize because a miss isn't serious
272             Map<String, Map<String, Boolean>> sub = seenEAV.get(element);
273             if (sub == null) {
274                 Map<String, Map<String, Boolean>> subAlready = seenEAV.putIfAbsent(element, sub = new ConcurrentHashMap<>());
275                 if (subAlready != null) {
276                     sub = subAlready; // discards empty map
277                 }
278             }
279             Map<String, Boolean> set = sub.get(attribute);
280             if (set == null) {
281                 Map<String, Boolean> setAlready = sub.putIfAbsent(attribute, set = new ConcurrentHashMap<>());
282                 if (setAlready != null) {
283                     set = setAlready; // discards empty map
284                 }
285             }
286             if (set.putIfAbsent(attrValue, Boolean.TRUE) != null) {
287                 return;
288             };
289 
290             // get the status & store
291             ValueStatus valueStatus = dtdData.getValueStatus(element, attribute, attrValue);
292             if (valueStatus != ValueStatus.valid) {
293                 // Set breakpoint here for debugging (referenced from http://cldr.unicode.org/development/testattributevalues)
294                 dtdData.getValueStatus(element, attribute, attrValue);
295             }
296             synchronized (valueStatusInfo) {
297                 valueStatusInfo.put(valueStatus, element, attribute, attrValue, Boolean.TRUE);
298             }
299         }
300 
show(boolean verbose, ImmutableSet<ValueStatus> retain)301         void show(boolean verbose, ImmutableSet<ValueStatus> retain) {
302             boolean haveProblems = false;
303 //          if (testLog.logKnownIssue("cldrbug 10120", "Don't enable error until complete")) {
304 //              testLog.warnln("Counts: " + counter.toString());
305 //          } else
306             for (ValueStatus valueStatus : ValueStatus.values()) {
307                 if (valueStatus == ValueStatus.valid) {
308                     continue;
309                 }
310                 M4<String, String, String, Boolean> info = valueStatusInfo.get(valueStatus);
311                 if (info != null) {
312                     haveProblems = true;
313                 }
314             }
315 
316             if (!verbose && !haveProblems) {
317                 return;
318             }
319             StringBuilder out = new StringBuilder();
320             out.append("\nIf the test fails, look at http://cldr.unicode.org/development/testattributevalues\n");
321 
322             out.append("file\tCount:\t" + dtdData.dtdType + "\t" + fileCount + "\n");
323             out.append("element\tCount:\t" + dtdData.dtdType + "\t" + elementCount + "\n");
324             out.append("attribute\tCount:\t" + dtdData.dtdType + "\t" + attributeCount + "\n");
325 
326             out.append("\nStatus\tDtdType\tElement\tAttribute\tMatch expression\t#Failures\tFailing values\n");
327 
328             for (Entry<ValueStatus, Map<String, Map<String, Map<String, Boolean>>>> entry : valueStatusInfo) {
329                 ValueStatus valueStatus = entry.getKey();
330                 if (retain != null && !retain.contains(valueStatus)) {
331                     continue;
332                 }
333                 if (!verbose && haveProblems && valueStatus == ValueStatus.valid) {
334                     continue;
335                 }
336                 for (Entry<String, Map<String, Map<String, Boolean>>> entry2 : entry.getValue().entrySet()) {
337                     String elementName = entry2.getKey();
338                     for (Entry<String, Map<String, Boolean>> entry3 : entry2.getValue().entrySet()) {
339                         String attributeName = entry3.getKey();
340                         Set<String> validFound = entry3.getValue().keySet();
341                         String matchValue = matchValues.get(elementName + "\t" + attributeName);
342                         out.append(
343                             valueStatus
344                             + "\t" + dtdData.dtdType
345                             + "\t" + elementName
346                             + "\t" + attributeName
347                             + "\t" + (matchValue == null ? "" : matchValue)
348                             + "\t" + validFound.size()
349                             + "\t" + Joiner.on(", ").join(validFound)
350                             + "\n"
351                             );
352                         if (valueStatus == ValueStatus.valid) try {
353                             LstrType lstr = LstrType.fromString(elementName);
354                             Map<String, Validity.Status> codeToStatus = VALIDITY.getCodeToStatus(lstr);
355                             Set<String> missing = new TreeSet<>(codeToStatus.keySet());
356                             if (lstr == LstrType.variant) {
357                                 for (String item : validFound) {
358                                     missing.remove(item.toLowerCase(Locale.ROOT));
359                                 }
360                             } else {
361                                 missing.removeAll(validFound);
362                             }
363                             Set<String> deprecated = VALIDITY.getStatusToCodes(lstr).get(LstrField.Deprecated);
364                             if (deprecated != null) {
365                                 missing.removeAll(deprecated);
366                             }
367                             if (!missing.isEmpty()) {
368                                 out.append(
369                                     "unused"
370                                         + "\t" + dtdData.dtdType
371                                         + "\t" + elementName
372                                         + "\t" + attributeName
373                                         + "\t" + ""
374                                         + "\t" + ""
375                                         + "\t" + Joiner.on(", ").join(missing)
376                                         + "\n"
377                                     );
378                             }
379                         } catch (Exception e) {}
380                     }
381                 }
382             }
383             synchronized (testLog) {
384                 testLog.errln(out.toString());
385             }
386         }
387     }
388 
xTestA()389     public void xTestA() {
390         MatcherPattern mp = AttributeValueValidity.getMatcherPattern("$language");
391         for (String language : LanguageInfo.getAvailable()) {
392             if (mp.matches(language, null)) {
393                 LanguageInfo languageInfo = LanguageInfo.get(language);
394                 show(language, languageInfo);
395             }
396         }
397     }
398 
show(String language, LanguageInfo languageInfo)399     private void show(String language, LanguageInfo languageInfo) {
400         logln(language
401             + "\t" + config.getEnglish().getName(CLDRFile.LANGUAGE_NAME, language)
402             + "\t" + languageInfo);
403     }
404 
405 //    public void TestAttributeValueValidity() {
406 //        for (String test : Arrays.asList(
407 //            "supplementalData;     territoryAlias;     replacement;    AA")) {
408 //            quickTest(test);
409 //        }
410 //    }
411 
quickTest(String test)412     private Status quickTest(String test) {
413         List<String> parts = SEMI_SPACE.splitToList(test);
414         Output<String> reason = new Output<>();
415         Status value = AttributeValueValidity.check(DtdData.getInstance(DtdType.valueOf(parts.get(0))), parts.get(1), parts.get(2), parts.get(3), reason);
416         if (value != Status.ok) {
417             errln(test + "\t" + value + "\t" + reason);
418         }
419         return value;
420     }
421 
oldTestSingleFile()422     public void oldTestSingleFile() {
423         Errors errors = new Errors();
424         Set<AttributeValueSpec> missing = new TreeSet<>();
425         VerifyAttributeValues.check(CLDRPaths.MAIN_DIRECTORY + "en.xml", errors, missing);
426         for (AttributeValueSpec entry1 : missing) {
427             errln("Missing Tests: " + entry1);
428         }
429         for (R3<String, AttributeValueSpec, String> item : errors.getRows()) {
430             errln(item.get0() + "; \t" + item.get2() + "; \t" + item.get1());
431         }
432     }
433 
oldTestCoreValidity()434     public void oldTestCoreValidity() {
435         int maxPerDirectory = getInclusion() <= 5 ? 20 : Integer.MAX_VALUE;
436         Matcher fileMatcher = null;
437         Set<AttributeValueSpec> missing = new LinkedHashSet<>();
438         Errors errors = new Errors();
439         VerifyAttributeValues.findAttributeValues(BASE_DIR, maxPerDirectory, fileMatcher, errors, missing, isVerbose() ? getErrorLogPrintWriter() : null);
440 
441         int count = 0;
442         for (Entry<AttributeValidityInfo, String> entry : AttributeValueValidity.getReadFailures().entrySet()) {
443             errln("Read error: " + ++count + "\t" + entry.getKey() + " => " + entry.getValue());
444         }
445 
446         count = 0;
447         for (R3<DtdType, String, String> entry1 : AttributeValueValidity.getTodoTests()) {
448             warnln("Unfinished Test: " + ++count + "\t" + new AttributeValueSpec(entry1.get0(), entry1.get1(), entry1.get2(), "").toString());
449         }
450 
451         count = 0;
452         for (AttributeValueSpec entry1 : missing) {
453             errln("Missing Test: " + entry1);
454         }
455 
456         count = 0;
457         for (R3<String, AttributeValueSpec, String> item : errors.getRows()) {
458             if ("deprecated".equals(item.get2()))
459                 errln("Deprecated: " + ++count
460                     + "; \t" + item.get0()
461                     + "; \t" + item.get1().type
462                     + "; \t" + item.get1().element
463                     + "; \t" + item.get1().attribute
464                     + "; \t" + item.get1().attributeValue
465                     + "; \t" + item.get2());
466         }
467 
468         count = 0;
469         for (R3<String, AttributeValueSpec, String> item : errors.getRows()) {
470             if (!"deprecated".equals(item.get2()))
471                 errln("Invalid: " + ++count
472                     + "; \t" + item.get0()
473                     + "; \t" + item.get1().type
474                     + "; \t" + item.get1().element
475                     + "; \t" + item.get1().attribute
476                     + "; \t" + item.get1().attributeValue
477                     + "; \t" + item.get2());
478         }
479     }
480 }
481