1 package org.unicode.cldr.unittest; 2 3 import java.io.BufferedReader; 4 import java.io.File; 5 import java.io.FileInputStream; 6 import java.io.InputStream; 7 import java.io.InputStreamReader; 8 import java.nio.charset.Charset; 9 import java.util.Collections; 10 import java.util.HashSet; 11 import java.util.LinkedHashSet; 12 import java.util.List; 13 import java.util.Locale; 14 import java.util.Map; 15 import java.util.Map.Entry; 16 import java.util.Set; 17 import java.util.TreeMap; 18 import java.util.TreeSet; 19 import java.util.concurrent.ConcurrentHashMap; 20 import java.util.concurrent.atomic.AtomicInteger; 21 import java.util.regex.Matcher; 22 import java.util.stream.Collectors; 23 import java.util.stream.Stream; 24 25 import javax.xml.stream.XMLInputFactory; 26 import javax.xml.stream.XMLStreamConstants; 27 import javax.xml.stream.XMLStreamException; 28 import javax.xml.stream.XMLStreamReader; 29 30 import org.unicode.cldr.tool.VerifyAttributeValues; 31 import org.unicode.cldr.tool.VerifyAttributeValues.Errors; 32 import org.unicode.cldr.util.AttributeValueValidity; 33 import org.unicode.cldr.util.AttributeValueValidity.AttributeValueSpec; 34 import org.unicode.cldr.util.AttributeValueValidity.MatcherPattern; 35 import org.unicode.cldr.util.AttributeValueValidity.Status; 36 import org.unicode.cldr.util.CLDRConfig; 37 import org.unicode.cldr.util.CLDRFile; 38 import org.unicode.cldr.util.CLDRPaths; 39 import org.unicode.cldr.util.ChainedMap; 40 import org.unicode.cldr.util.ChainedMap.M4; 41 import org.unicode.cldr.util.DtdData; 42 import org.unicode.cldr.util.DtdData.ValueStatus; 43 import org.unicode.cldr.util.DtdType; 44 import org.unicode.cldr.util.LanguageInfo; 45 import org.unicode.cldr.util.Organization; 46 import org.unicode.cldr.util.StandardCodes; 47 import org.unicode.cldr.util.StandardCodes.LstrField; 48 import org.unicode.cldr.util.StandardCodes.LstrType; 49 import org.unicode.cldr.util.StripUTF8BOMInputStream; 50 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo; 51 import org.unicode.cldr.util.Validity; 52 import org.unicode.cldr.util.XPathParts; 53 import org.xml.sax.Attributes; 54 55 import com.google.common.base.Joiner; 56 import com.google.common.base.Splitter; 57 import com.google.common.collect.ImmutableList; 58 import com.google.common.collect.ImmutableMap; 59 import com.google.common.collect.ImmutableSet; 60 import com.google.common.collect.ImmutableSortedSet; 61 import com.google.common.collect.Multimap; 62 import com.ibm.icu.dev.test.TestFmwk; 63 import com.ibm.icu.impl.Row.R3; 64 import com.ibm.icu.util.ICUException; 65 import com.ibm.icu.util.Output; 66 67 public class TestAttributeValues extends TestFmwk { 68 private static final boolean SERIAL = false; 69 70 private static final Validity VALIDITY = Validity.getInstance(); 71 private static final File BASE_DIR = new File(CLDRPaths.BASE_DIRECTORY); 72 public static final Joiner SPACE_JOINER = Joiner.on(' '); 73 public static final Splitter SPACE_SPLITTER = Splitter.on(' ').trimResults().omitEmptyStrings(); 74 static final Splitter SEMI_SPACE = Splitter.on(';').trimResults().omitEmptyStrings(); 75 private static final CLDRConfig config = CLDRConfig.getInstance(); 76 77 static final List<String> COMMON_AND_SEED = ImmutableList.of(CLDRPaths.COMMON_DIRECTORY, CLDRPaths.SEED_DIRECTORY); 78 main(String[] args)79 public static void main(String[] args) { 80 new TestAttributeValues().run(args); 81 } 82 TestValid()83 public void TestValid() { 84 String dtdTypeArg = params.props == null ? null : (String) params.props.get("dtdtype"); 85 86 // short- circuits for testing. null means do all 87 Set<DtdType> checkTypes = dtdTypeArg == null ? DtdType.STANDARD_SET 88 : Collections.singleton(DtdType.valueOf(dtdTypeArg)) ; 89 ImmutableSet<ValueStatus> showStatuses = null ; // ImmutableSet.of(ValueStatus.invalid, ValueStatus.unknown); 90 91 for (DtdType dtdType : checkTypes) { 92 PathChecker pathChecker = new PathChecker(this, DtdData.getInstance(dtdType)); 93 for (String mainDirs : COMMON_AND_SEED) { 94 Set<String> files = new TreeSet<>(); 95 for (String stringDir : dtdType.directories) { 96 addXMLFiles(dtdType, mainDirs + stringDir, files); 97 if (isVerbose()) 98 synchronized (pathChecker.testLog) { 99 warnln(mainDirs + stringDir); 100 } 101 } 102 Stream<String> stream = SERIAL ? files.stream() : files.parallelStream(); 103 stream.forEach(file -> checkFile(pathChecker, file)); 104 105 // for (String file : files) { 106 // checkFile(pathChecker, file); 107 // } 108 } 109 pathChecker.show(isVerbose(), showStatuses); 110 } 111 // List<String> localesToTest = Arrays.asList("en", "root"); // , "zh", "hi", "ja", "ru", "cy" 112 // Set<String> localesToTest = config.getCommonAndSeedAndMainAndAnnotationsFactory().getAvailable(); 113 // // TODO, add all other files 114 115 // for (String locale : localesToTest) { 116 // CLDRFile file = config.getCLDRFile(locale, false); 117 // for (String dpath : file) { 118 // String path = file.getFullXPath(dpath); 119 // pathChecker.checkPath(path); 120 // } 121 // } 122 } 123 124 125 static final Set<String> CLDR_LOCALES = ImmutableSortedSet.copyOf(StandardCodes.make() 126 .getLocaleCoverageLocales(Organization.cldr) 127 .stream() 128 .map(x -> x + ".xml") 129 .collect(Collectors.toSet())); 130 addXMLFiles(DtdType dtdType, String path, Set<String> files)131 private void addXMLFiles(DtdType dtdType, String path, Set<String> files) { 132 File dirFile = new File(path); 133 if (!dirFile.exists()) { 134 return; 135 } 136 if (!dirFile.isDirectory()) { 137 if (getInclusion() <= 5 138 && dtdType == DtdType.ldml) { 139 if (path.contains("/annotationsDerived/")) { 140 return; 141 } 142 String ending = path.substring(path.lastIndexOf('/')+1); 143 if (!CLDR_LOCALES.contains(ending)) { 144 return; 145 } 146 } 147 files.add(path); 148 } else { 149 for (String file : dirFile.list()) { 150 addXMLFiles(dtdType, path + "/" + file, files); 151 } 152 } 153 } 154 155 checkFile(PathChecker pathChecker, String fullFile)156 private void checkFile(PathChecker pathChecker, String fullFile) { 157 if (!fullFile.endsWith(".xml")) { 158 return; 159 } 160 pathChecker.fileCount.incrementAndGet(); 161 // if (isVerbose()) synchronized (this) { 162 // logln(fullFile); 163 // } 164 XMLInputFactory f = XMLInputFactory.newInstance(); 165 // XMLInputFactory f = XMLInputFactory.newFactory("org.apache.xerces.jaxp.SAXParserFactoryImpl", 166 // ClassLoader.getSystemClassLoader()); 167 168 int _elementCount = 0; 169 int _attributeCount = 0; 170 171 try { 172 // should convert these over to new io. 173 try (InputStream fis0 = new FileInputStream(fullFile); 174 InputStream fis = new StripUTF8BOMInputStream(fis0); 175 InputStreamReader inputStreamReader = new InputStreamReader(fis, Charset.forName("UTF-8")); 176 BufferedReader bufferedReader = new BufferedReader(inputStreamReader); 177 ) { 178 XMLStreamReader r = f.createXMLStreamReader(fullFile, bufferedReader); 179 String element = null; 180 while(r.hasNext()) { 181 try { 182 switch(r.next()){ 183 case XMLStreamConstants.START_ELEMENT: 184 element = r.getLocalName(); 185 ++_elementCount; 186 int attributeSize = r.getAttributeCount(); 187 for (int i = 0; i < attributeSize; ++i) { 188 ++_attributeCount; 189 String attribute = r.getAttributeLocalName(i); 190 String attributeValue = r.getAttributeValue(i); 191 pathChecker.checkAttribute(element, attribute, attributeValue); 192 } 193 break; 194 } 195 } catch (XMLStreamException e) { 196 synchronized (pathChecker.testLog) { 197 pathChecker.testLog.errln(fullFile + "error"); 198 } 199 e.printStackTrace(pathChecker.testLog.getLogPrintWriter()); 200 } 201 } 202 //XMLFileReader.read("noId", inputStreamReader, -1, true, myHandler); 203 } catch (XMLStreamException e) { 204 if (!logKnownIssue("cldrbug 10120", "XML reading issue")) { 205 warnln("Can't read " + fullFile); 206 } else { 207 throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fullFile).initCause(e); 208 } 209 } 210 } catch (Exception e) { 211 throw new ICUException(fullFile, e); 212 } 213 pathChecker.elementCount.addAndGet(_elementCount); 214 pathChecker.attributeCount.addAndGet(_attributeCount); 215 } 216 217 static class PathChecker { 218 private final ChainedMap.M5<ValueStatus, String, String, String, Boolean> valueStatusInfo 219 = ChainedMap.of(new TreeMap(), new TreeMap(), new TreeMap(), new TreeMap(), Boolean.class); 220 private final Set<String> seen = new HashSet<>(); 221 private final Map<String,Map<String,Map<String,Boolean>>> seenEAV = new ConcurrentHashMap<>(); 222 private final TestFmwk testLog; 223 private final DtdData dtdData; 224 private final Multimap<String, String> needsTesting; 225 private final Map<String,String> matchValues; 226 227 private final AtomicInteger fileCount = new AtomicInteger(); 228 private final AtomicInteger elementCount = new AtomicInteger(); 229 private final AtomicInteger attributeCount = new AtomicInteger(); 230 PathChecker(TestFmwk testLog, DtdData dtdData)231 public PathChecker(TestFmwk testLog, DtdData dtdData) { 232 this.testLog = testLog; 233 this.dtdData = dtdData; 234 Map<String,String> _matchValues = new TreeMap<>(); 235 needsTesting = dtdData.getNonEnumerated(_matchValues); 236 matchValues = ImmutableMap.copyOf(_matchValues); 237 } 238 checkPath(String path)239 private void checkPath(String path) { 240 if (seen.contains(path)) { 241 return; 242 } 243 seen.add(path); 244 if (path.contains("length-point")) { 245 int debug = 0; 246 } 247 XPathParts parts = XPathParts.getFrozenInstance(path); 248 for (int elementIndex = 0; elementIndex < parts.size(); ++elementIndex) { 249 String element = parts.getElement(elementIndex); 250 for (Entry<String, String> entry : parts.getAttributes(elementIndex).entrySet()) { 251 String attribute = entry.getKey(); 252 String attrValue = entry.getValue(); 253 checkAttribute(element, attribute, attrValue); 254 } 255 } 256 } 257 checkElement(String element, Attributes atts)258 public void checkElement(String element, Attributes atts) { 259 int length = atts.getLength(); 260 for (int i = 0; i < length; ++i) { 261 checkAttribute(element, atts.getQName(i), atts.getValue(i)); 262 } 263 } 264 checkAttribute(String element, String attribute, String attrValue)265 private void checkAttribute(String element, String attribute, String attrValue) { 266 // skip cases we know we don't need to test 267 if (!needsTesting.containsEntry(element, attribute)) { 268 return; 269 } 270 // check if we've seen the EAV yet 271 // we don't need to synchronize because a miss isn't serious 272 Map<String, Map<String, Boolean>> sub = seenEAV.get(element); 273 if (sub == null) { 274 Map<String, Map<String, Boolean>> subAlready = seenEAV.putIfAbsent(element, sub = new ConcurrentHashMap<>()); 275 if (subAlready != null) { 276 sub = subAlready; // discards empty map 277 } 278 } 279 Map<String, Boolean> set = sub.get(attribute); 280 if (set == null) { 281 Map<String, Boolean> setAlready = sub.putIfAbsent(attribute, set = new ConcurrentHashMap<>()); 282 if (setAlready != null) { 283 set = setAlready; // discards empty map 284 } 285 } 286 if (set.putIfAbsent(attrValue, Boolean.TRUE) != null) { 287 return; 288 }; 289 290 // get the status & store 291 ValueStatus valueStatus = dtdData.getValueStatus(element, attribute, attrValue); 292 if (valueStatus != ValueStatus.valid) { 293 // Set breakpoint here for debugging (referenced from http://cldr.unicode.org/development/testattributevalues) 294 dtdData.getValueStatus(element, attribute, attrValue); 295 } 296 synchronized (valueStatusInfo) { 297 valueStatusInfo.put(valueStatus, element, attribute, attrValue, Boolean.TRUE); 298 } 299 } 300 show(boolean verbose, ImmutableSet<ValueStatus> retain)301 void show(boolean verbose, ImmutableSet<ValueStatus> retain) { 302 boolean haveProblems = false; 303 // if (testLog.logKnownIssue("cldrbug 10120", "Don't enable error until complete")) { 304 // testLog.warnln("Counts: " + counter.toString()); 305 // } else 306 for (ValueStatus valueStatus : ValueStatus.values()) { 307 if (valueStatus == ValueStatus.valid) { 308 continue; 309 } 310 M4<String, String, String, Boolean> info = valueStatusInfo.get(valueStatus); 311 if (info != null) { 312 haveProblems = true; 313 } 314 } 315 316 if (!verbose && !haveProblems) { 317 return; 318 } 319 StringBuilder out = new StringBuilder(); 320 out.append("\nIf the test fails, look at http://cldr.unicode.org/development/testattributevalues\n"); 321 322 out.append("file\tCount:\t" + dtdData.dtdType + "\t" + fileCount + "\n"); 323 out.append("element\tCount:\t" + dtdData.dtdType + "\t" + elementCount + "\n"); 324 out.append("attribute\tCount:\t" + dtdData.dtdType + "\t" + attributeCount + "\n"); 325 326 out.append("\nStatus\tDtdType\tElement\tAttribute\tMatch expression\t#Failures\tFailing values\n"); 327 328 for (Entry<ValueStatus, Map<String, Map<String, Map<String, Boolean>>>> entry : valueStatusInfo) { 329 ValueStatus valueStatus = entry.getKey(); 330 if (retain != null && !retain.contains(valueStatus)) { 331 continue; 332 } 333 if (!verbose && haveProblems && valueStatus == ValueStatus.valid) { 334 continue; 335 } 336 for (Entry<String, Map<String, Map<String, Boolean>>> entry2 : entry.getValue().entrySet()) { 337 String elementName = entry2.getKey(); 338 for (Entry<String, Map<String, Boolean>> entry3 : entry2.getValue().entrySet()) { 339 String attributeName = entry3.getKey(); 340 Set<String> validFound = entry3.getValue().keySet(); 341 String matchValue = matchValues.get(elementName + "\t" + attributeName); 342 out.append( 343 valueStatus 344 + "\t" + dtdData.dtdType 345 + "\t" + elementName 346 + "\t" + attributeName 347 + "\t" + (matchValue == null ? "" : matchValue) 348 + "\t" + validFound.size() 349 + "\t" + Joiner.on(", ").join(validFound) 350 + "\n" 351 ); 352 if (valueStatus == ValueStatus.valid) try { 353 LstrType lstr = LstrType.fromString(elementName); 354 Map<String, Validity.Status> codeToStatus = VALIDITY.getCodeToStatus(lstr); 355 Set<String> missing = new TreeSet<>(codeToStatus.keySet()); 356 if (lstr == LstrType.variant) { 357 for (String item : validFound) { 358 missing.remove(item.toLowerCase(Locale.ROOT)); 359 } 360 } else { 361 missing.removeAll(validFound); 362 } 363 Set<String> deprecated = VALIDITY.getStatusToCodes(lstr).get(LstrField.Deprecated); 364 if (deprecated != null) { 365 missing.removeAll(deprecated); 366 } 367 if (!missing.isEmpty()) { 368 out.append( 369 "unused" 370 + "\t" + dtdData.dtdType 371 + "\t" + elementName 372 + "\t" + attributeName 373 + "\t" + "" 374 + "\t" + "" 375 + "\t" + Joiner.on(", ").join(missing) 376 + "\n" 377 ); 378 } 379 } catch (Exception e) {} 380 } 381 } 382 } 383 synchronized (testLog) { 384 testLog.errln(out.toString()); 385 } 386 } 387 } 388 xTestA()389 public void xTestA() { 390 MatcherPattern mp = AttributeValueValidity.getMatcherPattern("$language"); 391 for (String language : LanguageInfo.getAvailable()) { 392 if (mp.matches(language, null)) { 393 LanguageInfo languageInfo = LanguageInfo.get(language); 394 show(language, languageInfo); 395 } 396 } 397 } 398 show(String language, LanguageInfo languageInfo)399 private void show(String language, LanguageInfo languageInfo) { 400 logln(language 401 + "\t" + config.getEnglish().getName(CLDRFile.LANGUAGE_NAME, language) 402 + "\t" + languageInfo); 403 } 404 405 // public void TestAttributeValueValidity() { 406 // for (String test : Arrays.asList( 407 // "supplementalData; territoryAlias; replacement; AA")) { 408 // quickTest(test); 409 // } 410 // } 411 quickTest(String test)412 private Status quickTest(String test) { 413 List<String> parts = SEMI_SPACE.splitToList(test); 414 Output<String> reason = new Output<>(); 415 Status value = AttributeValueValidity.check(DtdData.getInstance(DtdType.valueOf(parts.get(0))), parts.get(1), parts.get(2), parts.get(3), reason); 416 if (value != Status.ok) { 417 errln(test + "\t" + value + "\t" + reason); 418 } 419 return value; 420 } 421 oldTestSingleFile()422 public void oldTestSingleFile() { 423 Errors errors = new Errors(); 424 Set<AttributeValueSpec> missing = new TreeSet<>(); 425 VerifyAttributeValues.check(CLDRPaths.MAIN_DIRECTORY + "en.xml", errors, missing); 426 for (AttributeValueSpec entry1 : missing) { 427 errln("Missing Tests: " + entry1); 428 } 429 for (R3<String, AttributeValueSpec, String> item : errors.getRows()) { 430 errln(item.get0() + "; \t" + item.get2() + "; \t" + item.get1()); 431 } 432 } 433 oldTestCoreValidity()434 public void oldTestCoreValidity() { 435 int maxPerDirectory = getInclusion() <= 5 ? 20 : Integer.MAX_VALUE; 436 Matcher fileMatcher = null; 437 Set<AttributeValueSpec> missing = new LinkedHashSet<>(); 438 Errors errors = new Errors(); 439 VerifyAttributeValues.findAttributeValues(BASE_DIR, maxPerDirectory, fileMatcher, errors, missing, isVerbose() ? getErrorLogPrintWriter() : null); 440 441 int count = 0; 442 for (Entry<AttributeValidityInfo, String> entry : AttributeValueValidity.getReadFailures().entrySet()) { 443 errln("Read error: " + ++count + "\t" + entry.getKey() + " => " + entry.getValue()); 444 } 445 446 count = 0; 447 for (R3<DtdType, String, String> entry1 : AttributeValueValidity.getTodoTests()) { 448 warnln("Unfinished Test: " + ++count + "\t" + new AttributeValueSpec(entry1.get0(), entry1.get1(), entry1.get2(), "").toString()); 449 } 450 451 count = 0; 452 for (AttributeValueSpec entry1 : missing) { 453 errln("Missing Test: " + entry1); 454 } 455 456 count = 0; 457 for (R3<String, AttributeValueSpec, String> item : errors.getRows()) { 458 if ("deprecated".equals(item.get2())) 459 errln("Deprecated: " + ++count 460 + "; \t" + item.get0() 461 + "; \t" + item.get1().type 462 + "; \t" + item.get1().element 463 + "; \t" + item.get1().attribute 464 + "; \t" + item.get1().attributeValue 465 + "; \t" + item.get2()); 466 } 467 468 count = 0; 469 for (R3<String, AttributeValueSpec, String> item : errors.getRows()) { 470 if (!"deprecated".equals(item.get2())) 471 errln("Invalid: " + ++count 472 + "; \t" + item.get0() 473 + "; \t" + item.get1().type 474 + "; \t" + item.get1().element 475 + "; \t" + item.get1().attribute 476 + "; \t" + item.get1().attributeValue 477 + "; \t" + item.get2()); 478 } 479 } 480 } 481